// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License version 2 as
// published by the Free Software Foundation.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

#include "degub.h"
#include "common.h"
#include "opcode.h"
#include "recompiler.h"
#include "recops.h"

//----------------------------------SPECIAL-------------------------------------

void Recompiler::__undefined() {
	add_pos_fatal_error(REC_UNDEFINED_OPCODE);
	SET_IT_ERROR;
}

//----------------------------------OPCODES-------------------------------------

void Recompiler::_addis() {
	D_rD; D_rA; D_SIMM;

	//r.gpr[rD] = rA_0 + (int(SIMM) << 16);
	if(rA == 0) {	//Optimization
		//mov r.gpr[rD], SIMM << 16
		ADD_BYTE(0xC7);
		ADD_BYTE(0x05);
		ADD_DWORD(&r.gpr[rD]);
		ADD_DWORD(int(SIMM) << 16);
	} else {
		if(rD == rA) {
			//add r.gpr[rD], SIMM << 16
			AB(0x81);
			AB(0x05);
			AD(&r.gpr[rD]);
			AD(int(SIMM) << 16);
		} else {
			//mov eax, r.gpr[rA]
			ADD_BYTE(0xA1);
			ADD_DWORD(&r.gpr[rA]);
			//add eax, SIMM << 16
			ADD_BYTE(0x05);
			ADD_DWORD(int(SIMM) << 16);
			//mov r.gpr[rD], eax
			ADD_BYTE(0xA3);
			ADD_DWORD(&r.gpr[rD]);
		}
	}
}

void Recompiler::_ori() {
	D_rS; D_rA; D_UIMM;

	//r.gpr[rA] = r.gpr[rS] | UIMM;
	if(rA == rS) {  //Optimization
		//or r.gpr[rA], UIMM
		AB(0x81);
		AB(0x0D);
		AD(&r.gpr[rA]);
		AD(UIMM);
	} else {
		//mov eax, r.gpr[rS]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rS]);
		//or eax, UIMM
		ADD_BYTE(0x0D);
		ADD_DWORD(UIMM);
		//mov r.gpr[rA], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rA]);
	}
}

void Recompiler::_or() {
	D_rS; D_rA; D_rB; D_Rc;

	//r.gpr[rA] = r.gpr[rS] | r.gpr[rB];
	if(rA == rS || rA == rB) {  //Optimization
		//NOP if(rA == rS && rA == rB && !Rc). Unlikely. Ignored.
		//mov eax, r.gpr[rA == rS ? rB : rS]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rA == rS ? rB : rS]);
		//or r.gpr[rA], eax
		AB(0x09);
		AB(0x05);
		AD(&r.gpr[rA]);
	} else {
		//mov eax, r.gpr[rS]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rS]);
		//or eax, r.gpr[rB]
		ADD_BYTE(0x0B);
		ADD_BYTE(0x05);
		ADD_DWORD(&r.gpr[rB]);
		//mov r.gpr[rA], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rA]);
	}

	if(Rc)
		add_setcr0();
}

void Recompiler::_mtspr() {
	D_rS; D_spr;
	if(!r.mtspr_is_valid(spr))
		throw bad_form_exception(bad_form_exception::Invalid);

	if(mtspr_requires_processing(spr)) {
		//mtspr_with_processing(opcode);
		//push edi
		AB(0x57);

		//push opcode	//this will be popped by the callee
		AB(0x68);
		AD(opcode);
		//mov ecx, this //the this pointer
		AB(0xB9);
		AD(this);
		//call mtspr_with_processing  //thiscall assumed
		AB(0xE8);
		void (Recompiler::*temp)(DWORD) = &Recompiler::mtspr_with_processing;
		REQUEST_CALL(MAKE(DWORD, temp));

		//pop edi
		AB(0x5F);
	} else {
		//*r.getmtspr(spr) = r.gpr[rS];
		//mov eax, r.gpr[rS]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rS]);
		//mov r.getmtspr(spr), eax
		ADD_BYTE(0xA3);
		ADD_DWORD(r.getmtspr(spr));
	}
}

void Recompiler::_stw() {
	D_rS; D_rA; D_d16;

	//m.ww(rA_0 + d, r.gpr[rS]);
	add_mem1_rA0d(rA, d, true);
	if(rA != 0) {	//unneccesary optimization? rA should almost always be != 0. make some statistics!
		//push r.gpr[rS]
		AB(0xFF);
		AB(0x35);
		AD(&r.gpr[rS]);
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.ww	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, DWORD) = &Hardware::ww;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov ebx, r.gpr[rS]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x1D);
		ADD_DWORD(&r.gpr[rS]);
		//bswap ebx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xCB);
		//mov [eax], ebx
		ADD_BYTE(0x89);
		ADD_BYTE(0x18);
	}
	add_label("end");
}

void Recompiler::_addi() {
	D_rD; D_rA; D_SIMM;

	//r.gpr[rD] = rA_0 + SIMM;
	if(rA == 0) {
		//mov r.gpr[rD], SIMM
		ADD_BYTE(0xC7);
		ADD_BYTE(0x05);
		ADD_DWORD(&r.gpr[rD]);
		ADD_DWORD(SIMM);
	} else {
		if(rD == rA) {  //Optimization
			//add r.gpr[rD], SIMM 
			AB(0x81);
			AB(0x05);
			AD(&r.gpr[rD]);
			AD(SIMM);
		} else {
			//mov eax, r.gpr[rA]
			ADD_BYTE(0xA1);
			ADD_DWORD(&r.gpr[rA]);
			//add eax, SIMM
			ADD_BYTE(0x05);
			ADD_DWORD(SIMM);
			//mov r.gpr[rD], eax
			ADD_BYTE(0xA3);
			ADD_DWORD(&r.gpr[rD]);
		}
	}
}

void Recompiler::_bc() {
	D_BO; D_BI; D_BD; D_AA; D_LK;
	DWORD target_address = AA ? BD : cia + BD;
	if(!g::advanced_mode && PHYSICALIZE(target_address) >= MAIN_MEMORY_SIZE)
		throw rec_stop_exception("Invalid jump address");

	if(BO != 0x14) {
		add_bc_ctr_cond(BO, BI);

		//if(ctr_ok && cond_ok) {
		//cmp al, 0x03
		ADD_BYTE(0x3C);
		ADD_BYTE(0x03);
		//jnz nojump
		ADD_BYTE(0x75);
		request_label("nojump");
	}
	//}
	add_static_branch(target_address, LK);
	add_label("nojump");

	last_instruction_type = (BO == 0x14) ? IT_BRANCH_UNCONDITIONAL : IT_BRANCH_CONDITIONAL;
}

void Recompiler::_b() {
	D_LI; D_AA; D_LK;
	DWORD target_address = AA ? LI : cia + LI;
	if(!g::advanced_mode && PHYSICALIZE(target_address) >= MAIN_MEMORY_SIZE)
		throw rec_stop_exception("Invalid jump address");

	add_static_branch(target_address, LK);
	last_instruction_type = IT_BRANCH_UNCONDITIONAL;
}

void Recompiler::_add() {
	D_rD; D_rA; D_rB; D_OE; D_Rc;

	if(rD == rA || rD == rB) {  //Optimization
		//mov eax, r.gpr[rD == rA ? rB : rA]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rD == rA ? rB : rA]);
		//add r.gpr[rD], eax
		AB(0x01);
		AB(0x05);
		AD(&r.gpr[rD]);
	} else {
		//mov eax, r.gpr[rA]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rA]);
		//add eax, r.gpr[rB]
		ADD_BYTE(0x03);
		ADD_BYTE(0x05);
		ADD_DWORD(&r.gpr[rB]);
		//mov r.gpr[rD], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rD]);
	}

	if(OE)
		add_setoverflow();
	if(Rc)
		add_setcr0();
}

void Recompiler::_lwz() {
	D_rD; D_rA; D_d16;

	//r.gpr[rD] = m.rw(rA_0 + d);
	add_mem1_rA0d(rA, d, false);
	if(rA != 0) {
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rw	//thiscall assumed
		AB(0xE8);
		DWORD (Hardware::*temp)(WORD) = &Hardware::rw;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov eax, [eax]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x00);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
	}
	add_label("end");
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);
}

void Recompiler::_cmpli() {
	D_crfD; D_L; D_rA; D_UIMM;
	if(L)
		throw bad_form_exception(bad_form_exception::Invalid);

	//cmp r.gpr[rA], UIMM
	AB(0x81);
	AB(0x3D);
	AD(&r.gpr[rA]);
	AD(UIMM);

	add_makecr2al_unsigned();
	add_setal2cr(crfD);
}

void Recompiler::_subf() {
	D_rD; D_rA; D_rB; D_OE; D_Rc;

	//r.gpr[rD] = r.gpr[rB] - r.gpr[rA];
	if(rD == rB) {  //Optimization. Order of operands matter. See add.
		//mov eax, r.gpr[rA]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rA]);
		//sub r.gpr[rD], eax
		AB(0x29);
		AB(0x05);
		AD(&r.gpr[rD]);
	} else {
		//mov eax, r.gpr[rB]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rB]);
		//sub eax, r.gpr[rA]
		ADD_BYTE(0x2B);
		ADD_BYTE(0x05);
		ADD_DWORD(&r.gpr[rA]);
		//mov r.gpr[rD], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rD]);
	}

	if(OE)
		add_setoverflow();
	if(Rc)
		add_setcr0();
}

void Recompiler::_cmp() {
	D_crfD; D_L; D_rA; D_rB;
	if(L)
		throw bad_form_exception(bad_form_exception::Invalid);

	//mov eax, r.gpr[rB]
	AB(0xA1);
	AD(&r.gpr[rB]);
	//cmp r.gpr[rA], eax
	AB(0x39);
	AB(0x05);
	AD(&r.gpr[rA]);

	add_makecr2al_cmp();
	add_setal2cr(crfD);
}

void Recompiler::_mfspr() {
	D_rD; D_spr;
	if(!r.mfspr_is_valid(spr))
		throw bad_form_exception(bad_form_exception::Invalid);

	if(mfspr_requires_processing(spr)) {
		//mfspr_with_processing(opcode);
		//push edi
		AB(0x57);

		//push opcode	//this will be popped by the callee
		AB(0x68);
		AD(opcode);
		//mov ecx, this //the this pointer
		AB(0xB9);
		AD(this);
		//call mfspr_with_processing  //thiscall assumed
		AB(0xE8);
		void (Recompiler::*temp)(DWORD) = &Recompiler::mfspr_with_processing;
		REQUEST_CALL(MAKE(DWORD, temp));

		//pop edi
		AB(0x5F);
	} else {
		//r.gpr[rD] = *r.getmfspr(spr);
		//mov eax, r.getmfspr(spr)
		ADD_BYTE(0xA1);
		ADD_DWORD(r.getmfspr(spr));
		//mov r.gpr[rD], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rD]);
	}
}

void Recompiler::_rlwinm() {
	D_rS; D_rA; D_SH; D_MB; D_ME; D_Rc;
	bool flags_set = false;

	//r.gpr[rA] = _rotl(r.gpr[rS], SH) & makemaskw(MB, ME);
	//mov eax, r.gpr[rS]
	AB(0xA1);
	AD(&r.gpr[rS]);
	if(SH != 0) {
		//rol eax, SH
		AB(0xC1);
		AB(0xC0);
		AB((BYTE)SH);
	}
	if(makemaskw(MB, ME) != 0xFFFFFFFF) {
		//and eax, makemaskw(MB, ME)
		AB(0x25);
		AD(makemaskw(MB, ME));
		flags_set = true;
	}
	//mov r.gpr[rA], eax
	AB(0xA3);
	AD(&r.gpr[rA]);

	if(Rc) {
		if(!flags_set) {
			//test eax, eax
			AB(0x85);
			AB(0xC0);
		}
		add_setcr0();
	}
}

void Recompiler::_andi() {
	D_rS; D_rA; D_UIMM;

	//r.gpr[rA] = r.gpr[rS] & UIMM;
	if(rA == rS) {  //Optimization
		//and r.gpr[rA], UIMM
		AB(0x81);
		AB(0x25);
		AD(&r.gpr[rA]);
		AD(UIMM);
	} else {
		//mov eax, r.gpr[rS]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rS]);
		//and eax, UIMM
		ADD_BYTE(0x25);
		ADD_DWORD(UIMM);
		//mov r.gpr[rA], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rA]);
	}
	//r.setcr0(r.gpr[rA]);
	add_setcr0();
}

void Recompiler::_oris() {
	D_rS; D_rA; D_UIMM;

	//r.gpr[rA] = r.gpr[rS] | (DWORD(UIMM) << 16);
	if(rA == rS) {  //Optimization
		//or r.gpr[rA], (DWORD(UIMM) << 16)
		AB(0x81);
		AB(0x0D);
		AD(&r.gpr[rA]);
		AD(DWORD(UIMM) << 16);
	} else {
		//mov eax, r.gpr[rS]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rS]);
		//or eax, (DWORD(UIMM) << 16)
		ADD_BYTE(0x0D);
		ADD_DWORD(DWORD(UIMM) << 16);
		//mov r.gpr[rA], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rA]);
	}
}

void Recompiler::_sync() {
}

void Recompiler::_stwu() {
	D_rS; D_rA; D_d16;
	if(rA == 0)
		throw bad_form_exception(bad_form_exception::Invalid);

	//m.ww(r.gpr[rA] + d, r.gpr[rS]);
	add_mem1_rA0d(rA, d, true);
	{
		//push r.gpr[rS]
		AB(0xFF);
		AB(0x35);
		AD(&r.gpr[rS]);
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.ww	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, DWORD) = &Hardware::ww;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov ebx, r.gpr[rS]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x1D);
		ADD_DWORD(&r.gpr[rS]);
		//bswap ebx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xCB);
		//mov [eax], ebx
		ADD_BYTE(0x89);
		ADD_BYTE(0x18);
	}
	add_label("end");

	//r.gpr[rA] += d;
	//add r.gpr[rA], d
	AB(0x81);
	AB(0x05);
	AD(&r.gpr[rA]);
	AD(d);
}

void Recompiler::_cmpi() {
	D_crfD; D_L; D_rA; D_SIMM;
	if(L)
		throw bad_form_exception(bad_form_exception::Invalid);

	//cmp r.gpr[rA], SIMM
	AB(0x81);
	AB(0x3D);
	AD(&r.gpr[rA]);
	AD(SIMM);

	add_makecr2al_cmp();
	add_setal2cr(crfD);
}

void Recompiler::_bclr() {
	D_BO; D_BI; //D_LK; //handled by error handler

	if(BO != 0x14) {
		add_bc_ctr_cond(BO, BI);

		//if(ctr_ok && cond_ok) {
		//cmp al, 0x03
		ADD_BYTE(0x3C);
		ADD_BYTE(0x03);
		//jnz nojump
		ADD_BYTE(0x75);
		request_label("nojump");
	}
	//mov eax, cia
	AB(0xB8);
	AD(cia);
	add_fatal_error(REC_BCLR);
	add_label("nojump");
	last_instruction_type = (BO == 0x14) ? IT_ERROR : IT_BRANCH_CONDITIONAL;
}

void Recompiler::_lhz() {
	D_rD; D_rA; D_d16;

	//r.gpr[rD] = m.rh(rA_0 + d);
	add_mem1_rA0d(rA, d, false);
	if(rA != 0) {
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rh	//thiscall assumed
		AB(0xE8);
		WORD (Hardware::*temp)(WORD) = &Hardware::rh;
		REQUEST_CALL(MAKE(DWORD, temp));
		//movzx eax, ax //assumed that the high bytes is zeroed. NOT SO.
		AB(0x0F);
		AB(0xB7);
		AB(0xC0);

		add_mem2();
	}
	{
		add_label("no_hardware");
		//movzx eax, word [eax]
		ADD_BYTE(0x0F);
		ADD_BYTE(0xB7);
		ADD_BYTE(0x00);
		//xchg al, ah
		ADD_BYTE(0x86);
		ADD_BYTE(0xC4);
	}
	add_label("end");
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);
}

void Recompiler::_stb() {
	D_rS; D_rA; D_d16;

	//m.wb(rA_0 + d, (BYTE)r.gpr[rS]);
	add_mem1_rA0d(rA, d, true);
	if(rA != 0) {
		//push r.gpr[rS]  //arguments are expanded to 32 bits as they're passed
		AB(0xFF);
		AB(0x35);
		AD(&r.gpr[rS]);
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.wb	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, BYTE) = &Hardware::wb;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov bl, r.gpr[rS] //this should load the least-significant byte
		ADD_BYTE(0x8A);
		ADD_BYTE(0x1D);
		ADD_DWORD(&r.gpr[rS]);
		//mov [eax], bl
		ADD_BYTE(0x88);
		ADD_BYTE(0x18);
	}
	add_label("end");
}

void Recompiler::_lbz() {
	D_rD; D_rA; D_d16;

	//r.gpr[rD] = m.rb(rA_0 + d);
	add_mem1_rA0d(rA, d, false);
	if(rA != 0) {
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rb	//thiscall assumed
		AB(0xE8);
		BYTE (Hardware::*temp)(WORD) = &Hardware::rb;
		REQUEST_CALL(MAKE(DWORD, temp));
		//movzx eax, al //assumed that the high bytes is zeroed. NOT SO.
		AB(0x0F);
		AB(0xB6);
		AB(0xC0);

		add_mem2();
	}
	{
		add_label("no_hardware");
		//movzx eax, byte [eax]
		ADD_BYTE(0x0F);
		ADD_BYTE(0xB6);
		ADD_BYTE(0x00);
	}
	add_label("end");
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);
}

void Recompiler::_mulli() {
	D_rD; D_rA; D_SIMM;

	//r.gpr[rD] = DWORD(__int64((int)r.gpr[rA]) * __int64(SIMM));
	//imul eax, r.gpr[rA], SIMM
	AB(0x69);
	AB(0x05);
	AD(&r.gpr[rA]);
	AD(SIMM);
	//mov r.gpr[rD], eax
	AB(0xA3);
	AD(&r.gpr[rD]);
}

void Recompiler::_extsb() {
	D_rS; D_rA; D_Rc;

	//r.gpr[rA] = (int)(signed char)r.gpr[rS];
	//movsx eax, byte r.gpr[rS]
	AB(0x0F);
	AB(0xBE);
	AB(0x05);
	AD(&r.gpr[rS]);
	//mov r.gpr[rA], eax
	AB(0xA3);
	AD(&r.gpr[rA]);

	if(Rc) {
		//test eax, eax  //To set eflags; mov* doesn't.
		AB(0x85);
		AB(0xC0);

		add_setcr0();
	}
}

void Recompiler::_isync() {
}

void Recompiler::_crxor() {
	D_crbD; D_crbA; D_crbB;

	//setflags(r.cr, makeflag(crbD), getbit(r.cr, crbA) != getbit(r.cr, crbB));
	//mov eax, r.cr
	AB(0xA1);
	AD(&r.cr);
	if(crbA == crbB) {  //Optimization. further opts for crbD == crbA and ==crbB is possible.
		//btr eax, 31-crbD
		AB(0x0F);
		AB(0xBA);
		AB(0xF0);
		AB((BYTE)31 - crbD);
	} else {
		//bt eax, 31-crbA
		AB(0x0F);
		AB(0xBA);
		AB(0xE0);
		AB((BYTE)31 - crbA);
		//jc crbA == 1
		AB(0x72);
		request_label("crbA == 1");
		{ //crbA == 0:
			//bt eax, 31-crbB
			AB(0x0F);
			AB(0xBA);
			AB(0xE0);
			AB((BYTE)31 - crbB);
			//jnc clear
			AB(0x73);
			request_label("clear");
			//jmp set
			AB(0xEB);
			request_label("set");
		}
		{
			add_label("crbA == 1");
			//bt eax, 31-crbB
			AB(0x0F);
			AB(0xBA);
			AB(0xE0);
			AB((BYTE)31 - crbB);
			//jc clear
			AB(0x72);
			request_label("clear");
		}
		add_label("set");
		//bts eax, 31-crbD
		AB(0x0F);
		AB(0xBA);
		AB(0xE8);
		AB((BYTE)31 - crbD);
		//jmp store
		AB(0xEB);
		request_label("store");

		add_label("clear");
		//btr eax, 31-crbD
		AB(0x0F);
		AB(0xBA);
		AB(0xF0);
		AB((BYTE)31 - crbD);
	}

	add_label("store");
	//mov r.cr, eax
	AB(0xA3);
	AD(&r.cr);
}

void Recompiler::_mulhw() {
	D_rD; D_rA; D_rB; D_Rc;

	//r.gpr[rD] = DWORD((__int64((int)r.gpr[rA]) * __int64((int)r.gpr[rB])) >> 32);
	//mov eax, r.gpr[rA]
	AB(0xA1);
	AD(&r.gpr[rA]);
	//imul r.gpr[rB]  //edx:eax = eax * r.gpr[rB]
	AB(0xF7);
	AB(0x2D);
	AD(&r.gpr[rB]);
	//mov r.gpr[rD], edx
	AB(0x89);
	AB(0x15);
	AD(&r.gpr[rD]);

	if(Rc) {
		//test edx, edx
		AB(0x85);
		AB(0xD2);

		add_setcr0();
	}
}

void Recompiler::_srawi() {
	D_rS; D_rA; D_SH; D_Rc;

	//ANY 1 bits, people!!!
	//setflags(r.xer, XER_CA, signw(r.gpr[rS]) &&
	//  getbitsw(r.gpr[rS], 32-SH, 31) != 0 && SH != 0);
	//mov eax, r.gpr[rS]
	AB(0xA1);
	AD(&r.gpr[rS]);

	if(SH == 0) {
		//clear_xerca
		//and byte [&r.xer], ~0x20
		AB(0x80);
		AB(0x25);
		AD(DWORD(&r.xer) + 3);
		AB(~0x20);
		//mov r.gpr[rA], eax
		AB(0xA3);
		AD(&r.gpr[rA]);
		if(Rc) {
			//test eax, eax
			AB(0x85);
			AB(0xC0);

			add_setcr0();
		}
	} else {
		//bt eax, 31-0
		AB(0x0F);
		AB(0xBA);
		AB(0xE0);
		AB(31-0);
		//jnc clear_xerca
		AB(0x73);
		request_label("clear_xerca");
		{ //set_xerca:
			//test eax, bitmask(32-SH, 31)
			AB(0xA9);
			AD(makemaskw(32-SH, 31));

			add_setxerca_by_nzf();  //remove this for optimization

			//jmp shift
			AB(0xEB);
			request_label("shift");
		}
		{
			add_label("clear_xerca");
			//and byte [&r.xer], ~0x20
			AB(0x80);
			AB(0x25);
			AD(DWORD(&r.xer) + 3);
			AB(~0x20);
		}
		add_label("shift");
		//sar eax, SH //does not clear CF if SH == 0  //no matter now
		AB(0xC1);
		AB(0xF8);
		AB(SH);
		//mov r.gpr[rA], eax
		AB(0xA3);
		AD(&r.gpr[rA]);
		if(Rc)
			add_setcr0();
	}
}

void Recompiler::_dcbf() {
	//D_rA; D_rB;

	//DWORD EA = rA_0 + r.gpr[rB];
	/*if(g::cache_enabled) {
	//if(EA >= CACHE_BASE && EA < CACHE_BASE + CACHE_SIZE)
	if(rA == 0) {
	//mov eax, r.gpr[rB]
	ADD_BYTE(0xA1);
	ADD_DWORD(&r.gpr[rB]);
	} else {
	//mov eax, r.gpr[rA]
	ADD_BYTE(0xA1);
	ADD_DWORD(&r.gpr[rA]);
	//add eax, r.gpr[rB]
	ADD_BYTE(0x03);
	ADD_BYTE(0x05);
	ADD_DWORD(&r.gpr[rB]);
	}
	//cmp eax, CACHE_BASE
	AB(0x3D);
	AD(CACHE_BASE);
	//jb cache_miss
	AB(0x72);
	request_label("cache_miss");
	//cmp eax, CACHE_BASE + CACHE_SIZE
	AB(0x3D);
	AD(CACHE_BASE + CACHE_SIZE);
	//jnb cache_miss
	AB(0x73);
	request_label("cache_miss");
	{
	add_label("cache_hit");
	//EA &= CACHE_BLOCK_MASK;
	//m.read_cached(EA, 32, m.getp_physical(PHYSICALIZE(EA), 32));
	//push edi
	AB(0x57);
	//and eax, ~0xC000001F
	AB(0x25);
	AD(~0xC000001F);
	//add eax, memory
	AB(0x05);
	AD(memory);
	//mov edi, eax
	AB(0x89);
	AB(0xC7);
	//add eax, cache_memory - memory
	AB(0x05);
	AD(cache_memory - memory);
	//mov esi, eax
	AB(0x89);
	AB(0xC6);
	//xor ecx, ecx
	AB(0x31);
	AB(0xC9);
	//mov cl, 32
	AB(0xB1);
	AB(32);
	//rep movsb
	AB(0xF3);
	AB(0xA4);
	//pop edi
	AB(0x5F);
	}
	add_label("cache_miss");
	}*/
}

void Recompiler::_lfd() {
	D_frD; D_rA; D_d16;

	//r.fpr[frD].dword = m.rd(rA_0 + d);
	add_mem1_rA0d(rA, d, false);
	if(rA != 0) {
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rd	//thiscall assumed
		AB(0xE8);
		QWORD (Hardware::*temp)(WORD) = &Hardware::rd;
		REQUEST_CALL(MAKE(DWORD, temp));
		//mov r.fpr[frD].loword, eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.fpr[frD].loword);
		//mov r.fpr[frD].hiword, edx
		ADD_BYTE(0x89);
		AB(0x15);
		ADD_DWORD(&r.fpr[frD].hiword);

		add_mem2();
	}
	{
		add_label("no_hardware");
		//r.fpr[frD].dword = swapd([eax]);
		//mov ebx, eax
		AB(0x8B);
		AB(0xD8);

		//mov eax, [ebx]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x03);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
		//mov r.fpr[frD].hiword, eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.fpr[frD].hiword);

		//mov eax, [ebx + 4]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x43);
		AB(4);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
		//mov r.fpr[frD].loword, eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.fpr[frD].loword);
	}
	add_label("end");

}

void Recompiler::_xoris() {
	D_rS; D_rA; D_UIMM;

	//r.gpr[rA] = r.gpr[rS] ^ (DWORD(UIMM) << 16);
	if(rA == rS) {  //Optimization
		//xor r.gpr[rA], (DWORD(UIMM) << 16)
		AB(0x81);
		AB(0x35);
		AD(&r.gpr[rA]);
		AD(DWORD(UIMM) << 16);
	} else {
		//mov eax, r.gpr[rS]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rS]);
		//xor eax, (DWORD(UIMM) << 16)
		ADD_BYTE(0x35);
		ADD_DWORD(DWORD(UIMM) << 16);
		//mov r.gpr[rA], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rA]);
	}
}

void Recompiler::_xori() {
	D_rS; D_rA; D_UIMM;

	//r.gpr[rA] = r.gpr[rS] ^ UIMM;
	if(rA == rS) {  //Optimization
		//xor r.gpr[rA], UIMM
		AB(0x81);
		AB(0x35);
		AD(&r.gpr[rA]);
		AD(UIMM);
	} else {
		//mov eax, r.gpr[rS]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rS]);
		//xor eax, UIMM
		ADD_BYTE(0x35);
		ADD_DWORD(UIMM);
		//mov r.gpr[rA], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rA]);
	}
}

void Recompiler::_extsh() {
	D_rS; D_rA; D_Rc;

	//r.gpr[rA] = (int)(signed short)r.gpr[rS];
	//movsx eax, word r.gpr[rS]
	AB(0x0F);
	AB(0xBF);
	AB(0x05);
	AD(&r.gpr[rS]);
	//mov r.gpr[rA], eax
	AB(0xA3);
	AD(&r.gpr[rA]);

	if(Rc) {
		//test eax, eax  //To set eflags; mov* doesn't.
		AB(0x85);
		AB(0xC0);

		add_setcr0();
	}
}

void Recompiler::_subfic() {
	D_rD; D_rA; D_SIMM;

	//can be optimized if rD == rA
	//stc;
	AB(0xF9);
	//mov eax, r.gpr[rA]
	AB(0xA1);
	AD(&r.gpr[rA]);
	//not eax
	AB(0xF7);
	AB(0xD0);
	//adc eax, SIMM //can be optimized if -128 <= SIMM <= 127
	AB(0x15);
	AD(SIMM);
	//mov r.gpr[rD], eax
	AB(0xA3);
	AD(&r.gpr[rD]);

	add_setxerca_by_cf();
}

void Recompiler::_lwzx() {
	D_rD; D_rA; D_rB;

	//r.gpr[rD] = m.rw(rA_0 + r.gpr[rB]);
	add_mem1_x(rA, rB, false);
	{
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rw	//thiscall assumed
		AB(0xE8);
		DWORD (Hardware::*temp)(WORD) = &Hardware::rw;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov eax, [eax]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x00);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
	}
	add_label("end");
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);
}

void Recompiler::_stwx() {
	D_rS; D_rA; D_rB;

	//m.ww(rA_0 + r.gpr[rB], r.gpr[rS]);
	add_mem1_x(rA, rB, true);
	{
		//push r.gpr[rS]
		AB(0xFF);
		AB(0x35);
		AD(&r.gpr[rS]);
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.ww	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, DWORD) = &Hardware::ww;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov ebx, r.gpr[rS]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x1D);
		ADD_DWORD(&r.gpr[rS]);
		//bswap ebx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xCB);
		//mov [eax], ebx
		ADD_BYTE(0x89);
		ADD_BYTE(0x18);
	}
	add_label("end");
}

void Recompiler::_sth() {
	D_rS; D_rA; D_d16;

	//m.wh(rA_0 + d, (WORD)r.gpr[rS]);
	add_mem1_rA0d(rA, d, true);
	if(rA != 0) {
		//push r.gpr[rS]  //arguments are expanded to 32 bits as they're passed //may not be entirely safe, but it should
		AB(0xFF);
		AB(0x35);
		AD(&r.gpr[rS]);
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.wh	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, WORD) = &Hardware::wh;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//movzx ebx, (WORD)r.gpr[rS]
		AB(0x0F);
		AB(0xB7);
		AB(0x1D);
		AD(&r.gpr[rS]);
		//xchg bl, bh
		ADD_BYTE(0x86);
		ADD_BYTE(0xDF);
		//mov [eax], bx
		ADD_BYTE(0x66);
		ADD_BYTE(0x89);
		ADD_BYTE(0x18);
	}
	add_label("end");
}

void Recompiler::_and() {
	D_rS; D_rA; D_rB; D_Rc;

	//r.gpr[rA] = r.gpr[rS] & r.gpr[rB];
	if(rA == rS || rA == rB) {  //Optimization
		//NOP if(rA == rS && rA == rB && !Rc). Unlikely. Ignored.
		//mov eax, r.gpr[rA == rS ? rB : rS]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rA == rS ? rB : rS]);
		//and r.gpr[rA], eax
		AB(0x21);
		AB(0x05);
		AD(&r.gpr[rA]);
	} else {
		//mov eax, r.gpr[rS]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rS]);
		//and eax, r.gpr[rB]
		ADD_BYTE(0x23);
		ADD_BYTE(0x05);
		ADD_DWORD(&r.gpr[rB]);
		//mov r.gpr[rA], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rA]);
	}

	if(Rc)
		add_setcr0();
}

void Recompiler::_addze() {
	D_rD; D_rA; D_OE; D_Rc;

	//r.gpr[rD] = r.gpr[rA] + XER_CA;
	//bt r.xer, 31-2  //is bit 2 in PPC
	AB(0x0F);
	AB(0xBA);
	AB(0x25);
	AD(&r.xer);
	AB(31-2);
	if(rD == rA) {
		//adc r.gpr[rD], 0	//using CF from bt
		AB(0x83);
		AB(0x15);
		AD(&r.gpr[rD]);
		AB(0);
	} else {
		//mov eax, r.gpr[rA]
		AB(0xA1);
		AD(&r.gpr[rA]);
		//adc eax, 0  //using CF from bt
		AB(0x83);
		AB(0xD0);
		AB(0);
		//mov r.gpr[rD], eax
		AB(0xA3);
		AD(&r.gpr[rD]);
	}

	if(OE)
		add_setoverflow();
	if(Rc)
		add_setcr0();

	add_setxerca_by_cf();
}

void Recompiler::_fsub() {
	D_frD; D_frA; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPRD_FPRF(FPR_PS_D(frA) - FPR_PS_D(frB));
	SET_DISP8;
	FLD_PS_D_DISP8(frA);
	FSUB_PS_D_DISP8(frB);
	add_set_fpscr_fprf();
	FSTPD_DISP8;
}

void Recompiler::_frsp() {
	D_frD; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//r.fpr[frD].d = (float)r.fpr[frB].d;
	add_set_fpcw_precision_float();

	//fld r.fpr[frB].d
	AB(0xDD);
	AB(0x05);
	AD(&r.fpr[frB].d);

	add_set_fpscr_fprf(); //We need to to this before the stack is popped and we lose the data

	//fstp r.fpr[frD].d
	AB(0xDD);
	AB(0x1D);
	AD(&r.fpr[frD].d);

	add_set_fpcw_precision_extd();
}

void Recompiler::_lfs() {
	D_frD; D_rA; D_d16;

	//SET_FPR_SINGLE(MAKE(float, m.rw(rA_0 + d)));
	add_mem1_rA0d(rA, d, false);
	if(rA != 0) {
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rw	//thiscall assumed
		AB(0xE8);
		DWORD (Hardware::*temp)(WORD) = &Hardware::rw;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//eax = swapw([eax]);
		//mov eax, [eax]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x00);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
	}
	add_label("end");
	//PS0(frD) = PS1(frD) = MAKE(float, eax);
	//fld eax //not possible; we must go through memory
	//mov [esp - 4], eax
	AB(0x89);
	AB(0x44);
	AB(0x24);
	AB(-4);
	//fld m32real [esp - 4]
	AB(0xD9);
	AB(0x44);
	AB(0x24);
	AB(-4);

	SET_FPR_SINGLE;
}

void Recompiler::_fmuls() {
	D_frD; D_frA; D_frC; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPR_SINGLE_FPRF(FPR_PS_D(frA) * FPR_PS_D(frC));
	SET_DISP8;
	FLD_PS_D_DISP8(frA);
	FMUL_PS_D_DISP8(frC);
	add_set_fpscr_fprf();
	SET_FPR_SINGLE_DISP8;
}

void Recompiler::_fadds() {
	D_frD; D_frA; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPR_SINGLE_FPRF(FPR_PS_D(frA) + FPR_PS_D(frB));
	SET_DISP8;
	FLD_PS_D_DISP8(frA);
	FADD_PS_D_DISP8(frB);
	add_set_fpscr_fprf();
	SET_FPR_SINGLE_DISP8;
}

void Recompiler::_fctiwz() {
	D_frD; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//M$VC usually uses round to zero, if I've understood things correctly.
	//Now it seems I haven't.
	SET_DISP8;

	//fnstcw WORD [esp - 2]  //for restore
	AB(0xD9);
	AB(0x7C);
	AB(0x24);
	AB(-2);
	//fnstcw WORD [esp - 4]
	AB(0xD9);
	AB(0x7C);
	AB(0x24);
	AB(-4);
	//or BYTE [esp - 3], 0x0C //set RC to 11b
	AB(0x80);
	AB(0x4C);
	AB(0x24);
	AB(-3);
	AB(0x0C);
	//fldcw WORD [esp - 4]
	AB(0xD9);
	AB(0x6C);
	AB(0x24);
	AB(-4);

	//r.fpr[frD].loword = (int)FPR_PS_D(frB);
	FLD_PS_D_DISP8(frB);
	//fistp r.fpr[frD].loword
	AB(0xDB);
	AB(0x5B);
	ADD_DISP8(&r.fpr[frD].loword);

	//we don't need to restore the old mode (?)
	//we shouldn't need to, but things are broken for no reason, so we'll try
	//fldcw WORD [esp - 2]
	AB(0xD9);
	AB(0x6C);
	AB(0x24);
	AB(-2);

	//r.fpr[frD].hiword = UNDEFINED_PPCWORD;
	//mov r.fpr[frD].hiword, UNDEFINED_PPCWORD
	AB(0xC7);
	AB(0x43);
	ADD_DISP8(&r.fpr[frD].hiword);
	AD(UNDEFINED_PPCWORD);
}

void Recompiler::_stfd() {
	D_frS; D_rA; D_d16;

	//m.wd(rA_0 + d, MAKE(QWORD, FPR_PS_D(frS)));
	FLD_PS_D(frS);  //It's a performance gain to move this here, but I'm not sure how stable it is.

	add_mem1_rA0d(rA, d, true);
	if(rA != 0) {
		//push (double)FPR_PS_D(frS)
		AB(0x83); AB(0xEC); AB(8);	//sub esp, 8
		AB(0xDD); AB(0x1C); AB(0x24); //fstp qword [esp]
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.wd	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, QWORD) = &Hardware::wd;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		AB(0xDD); AB(0x18);	//fstp qword [eax]

		//mov ebx, [eax]
		AB(0x8B);
		AB(0x18);
		//mov ecx, [eax + 4]
		AB(0x8B);
		AB(0x48);
		AB(4);
		//bswap ebx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xCB);
		//bswap ecx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC9);
		//mov [eax + 4], ebx
		AB(0x89);
		AB(0x58);
		AB(4);
		//mov [eax], ecx
		AB(0x89);
		AB(0x08);
	}
	add_label("end");
}

void Recompiler::_fsubs() {
	D_frD; D_frA; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPR_SINGLE_FPRF(FPR_PS_D(frA) - FPR_PS_D(frB));
	SET_DISP8;
	FLD_PS_D_DISP8(frA);
	FSUB_PS_D_DISP8(frB);
	add_set_fpscr_fprf();
	SET_FPR_SINGLE_DISP8;
}

void Recompiler::_mfcr() {
	D_rD;
	//r.gpr[rD] = r.cr;
	//mov eax, r.cr
	ADD_BYTE(0xA1);
	ADD_DWORD(&r.cr);
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);
}

void Recompiler::_cmpl() {
	D_crfD; D_L; D_rA; D_rB;
	if(L)
		throw bad_form_exception(bad_form_exception::Invalid);

	//mov eax, r.gpr[rB]
	AB(0xA1);
	AD(&r.gpr[rB]);
	//cmp r.gpr[rA], eax
	AB(0x39);
	AB(0x05);
	AD(&r.gpr[rA]);

	add_makecr2al_unsigned();
	add_setal2cr(crfD);
}

void Recompiler::_xor() {
	D_rS; D_rA; D_rB; D_Rc;

	//r.gpr[rA] = r.gpr[rS] ^ r.gpr[rB];
	if(rA == rS || rA == rB) {  //Optimization
		//NOP if(rA == rS && rA == rB && !Rc). Unlikely. Ignored.
		//mov eax, r.gpr[rA == rS ? rB : rS]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rA == rS ? rB : rS]);
		//xor r.gpr[rA], eax
		AB(0x31);
		AB(0x05);
		AD(&r.gpr[rA]);
	} else {
		//mov eax, r.gpr[rS]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rS]);
		//xor eax, r.gpr[rB]
		ADD_BYTE(0x33);
		ADD_BYTE(0x05);
		ADD_DWORD(&r.gpr[rB]);
		//mov r.gpr[rA], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rA]);
	}

	if(Rc)
		add_setcr0();
}

void Recompiler::_neg() {
	D_rD; D_rA; D_OE; D_Rc;

	//r.gpr[rD] = -int(r.gpr[rA]);
	if(rD == rA) {  //Optimization
		//neg r.gpr[rD]
		AB(0xF7);
		AB(0x1D);
		AD(&r.gpr[rD]);
	} else {
		//mov eax, r.gpr[rA]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rA]);
		//neg eax
		ADD_BYTE(0xF7);
		ADD_BYTE(0xD8);
		//mov r.gpr[rD], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rD]);
	}

	if(OE)
		add_setoverflow();
	if(Rc)
		add_setcr0();
}

void Recompiler::_mfmsr() {
	D_rD;
	//r.gpr[rD] = r.getmsr();
	//eax = r.getmsr();
	//push edi
	AB(0x57);
	//mov ecx, &r //the this pointer
	AB(0xB9);
	AD(&r);
	//call r.getmsr  //thiscall assumed
	AB(0xE8);
	DWORD (Registers::*temp)() const = &Registers::getmsr;
	REQUEST_CALL(MAKE(DWORD, temp));
	//pop edi
	AB(0x5F);

	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);
}

void Recompiler::_mtmsr() {
	D_rS;
	//r.setmsr(r.gpr[rS]);
	//mov eax, r.gpr[rS]
	ADD_BYTE(0xA1);
	ADD_DWORD(&r.gpr[rS]);

	//r.setmsr(eax)
	//push edi
	AB(0x57);
	//push eax
	AB(0x50);
	//mov ecx, &r //the this pointer
	AB(0xB9);
	AD(&r);
	//call r.setmsr  //thiscall assumed
	AB(0xE8);
	void (Registers::*temp)(DWORD) = &Registers::setmsr;
	REQUEST_CALL(MAKE(DWORD, temp));
	//pop edi
	AB(0x5F);
}

void Recompiler::_fmr() {
	D_frD; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	SET_DISP8;

	//r.fpr[frD] = r.fpr[frB];  //this can probably be optimized, pipelining-wise
	//mov eax, r.fpr[frB].loword
	AB(0x8B);
	AB(0x43);
	ADD_DISP8(&r.fpr[frB].loword);
	//mov r.fpr[frD].loword, eax
	AB(0x89);
	AB(0x43);
	ADD_DISP8(&r.fpr[frD].loword);
	//mov eax, r.fpr[frB].hiword
	AB(0x8B);
	AB(0x43);
	ADD_DISP8(&r.fpr[frB].hiword);
	//mov r.fpr[frD].hiword, eax
	AB(0x89);
	AB(0x43);
	ADD_DISP8(&r.fpr[frD].hiword);
}

void Recompiler::_mtfsf() {
	D_FM; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	if(FM == 0xFF) {  //Optimization
		//mov eax, r.fpr[frB].loword
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.fpr[frB].loword);
		//and eax, ~(FPSCR_FEX | FPSCR_VX)
		AB(0x25);
		AD(~(FPSCR_FEX | FPSCR_VX));
		//mov r.fpscr, eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.fpscr);
	} else {
		DWORD bitmask=0;
		for(int i=0; i<8; i++) if(getbit(FM, 24+i)) {
			bitmask |= makemaskw(i*4, i*4+3);
		}
		bitmask &= ~(FPSCR_FEX | FPSCR_VX);
		{
			//setflags(r.fpscr, bitmask, false);
			//and r.fpscr, ~bitmask	10
			AB(0x81);
			AB(0x25);
			AD(&r.fpscr);
			AD(~bitmask);
			//setflags(r.fpscr, bitmask & r.fpr[frB].loword, true);
			//mov eax, r.fpr[frB].loword	5
			AB(0xA1);
			AD(&r.fpr[frB].loword);
			//and eax, bitmask  5
			AB(0x25);
			AD(bitmask);
			//or r.fprscr, eax	6
			AB(0x09);
			AB(0x05);
			AD(&r.fpscr);
		}
		/*{ //Different version, perhaps optimized.
		//mov ebx, ~bitmask	5
		//mov eax, r.fpscr	5
		//and eax, ebx		2
		//not ebx			2
		//and ebx, r.fpr[frB].loword  2
		//or eax, ebx		2
		//mov r.fprscr, eax	5
		}*/
	}
	add_update_fpscr_fex_vx();
}

void Recompiler::_mtfsb1() {
	D_crbD; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);
	if(crbD == 1 || crbD == 2)
		throw bad_form_exception(bad_form_exception::Unemulated); //invalid?

	//setflags(r.fpscr, makeflag(crbD), true);
	//bts r.fpscr, 31 - cbrD
	AB(0x0F);
	AB(0xBA);
	AB(0x2D);
	AD(&r.fpscr);
	AB(31 - crbD);
}

void Recompiler::_andc() {
	D_rS; D_rA; D_rB; D_Rc;

	//r.gpr[rA] = r.gpr[rS] & ~r.gpr[rB];
	if(rA == rS) {  //Optimization
		//mov eax, r.gpr[rB]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rB]);
		//not eax
		AB(0xF7);
		AB(0xD0);
		//and r.gpr[rA], eax
		AB(0x21);
		AB(0x05);
		AD(&r.gpr[rA]);
	} else {
		//mov eax, r.gpr[rB]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rB]);
		//not eax
		AB(0xF7);
		AB(0xD0);
		//and eax, r.gpr[rS]
		ADD_BYTE(0x23);
		ADD_BYTE(0x05);
		ADD_DWORD(&r.gpr[rS]);
		//mov r.gpr[rA], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rA]);
	}

	if(Rc)
		add_setcr0();
}

void Recompiler::_cntlzw() {
	D_rS; D_rA; D_Rc;

	/*DWORD n=0;
	for(; n<32; n++)
	if(getbit(r.gpr[rS], n))
	break;*/
	//bsr eax, r.gpr[rS]
	AB(0x0F);
	AB(0xBD);
	AB(0x05);
	AD(&r.gpr[rS]);
	//jz zero
	AB(0x74);
	request_label("zero");
	{ //not_zero:
		//eax -= 31;
		//neg eax
		AB(0xF7);
		AB(0xD8);
		//add eax, 31
		AB(0x83);
		AB(0xC0);
		AB(31);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("zero");
		//mov eax, 32
		AB(0xB8);
		AD(32);
		if(Rc) {
			//test eax, eax
			AB(0x85);
			AB(0xC0);
		}
	}
	add_label("store");
	//r.gpr[rA] = n;
	//mov r.gpr[rA], eax
	AB(0xA3);
	AD(&r.gpr[rA]);

	if(Rc)
		add_setcr0();
}

void Recompiler::_lbzx() {
	D_rD; D_rA; D_rB;

	//r.gpr[rD] = m.rb(rA_0 + r.gpr[rB]);
	add_mem1_x(rA, rB, false);
	{
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rb	//thiscall assumed
		AB(0xE8);
		BYTE (Hardware::*temp)(WORD) = &Hardware::rb;
		REQUEST_CALL(MAKE(DWORD, temp));
		//movzx eax, al //assumed that the high bytes is zeroed. NOT SO.
		AB(0x0F);
		AB(0xB6);
		AB(0xC0);

		add_mem2();
	}
	{
		add_label("no_hardware");
		//movzx eax, byte [eax]
		ADD_BYTE(0x0F);
		ADD_BYTE(0xB6);
		ADD_BYTE(0x00);
	}
	add_label("end");
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);
}

void Recompiler::_addic_() {
	D_rD; D_rA; D_SIMM;

	//r.gpr[rD] = rA + SIMM;
	//setflags(r.xer, XER_CA, carry(r.gpr[rA], SIMM));
	if(rD == rA) {  //Optimization
		//add r.gpr[rD], SIMM 
		AB(0x81);
		AB(0x05);
		AD(&r.gpr[rD]);
		AD(SIMM);
		add_setcr0();
	} else {
		//mov eax, r.gpr[rA]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rA]);
		//add eax, SIMM
		ADD_BYTE(0x05);
		ADD_DWORD(SIMM);
		//mov r.gpr[rD], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rD]);
		add_setcr0();
	}
	//jnc no_carry
	AB(0x73);
	request_label("no_carry");
	{ //carry:
		//bts r.xer, 31-2
		AB(0x0F);
		AB(0xBA);
		AB(0x2D);
		AD(&r.xer);
		AB(31-2);
		//jmp end
		AB(0xEB);
		request_label("end");
	}
	{
		add_label("no_carry");
		//btr r.xer, 31-2
		AB(0x0F);
		AB(0xBA);
		AB(0x35);
		AD(&r.xer);
		AB(31-2);
	}
	add_label("end");
}

void Recompiler::_rlwimi() {
	D_rS; D_rA; D_SH; D_MB; D_ME; D_Rc;

	//r.gpr[rA] = (_rotl(r.gpr[rS], SH) & makemaskw(MB, ME)) | (r.gpr[rA] & ~makemaskw(MB, ME));
	//mov eax, r.gpr[rS]
	AB(0xA1);
	AD(&r.gpr[rS]);
	//rol eax, SH
	AB(0xC1);
	AB(0xC0);
	AB((BYTE)SH);
	//and eax, makemaskw(MB, ME)
	AB(0x25);
	AD(makemaskw(MB, ME));
	//mov ebx, ~makemaskw(MB, ME)
	AB(0xBB);
	AD(~makemaskw(MB, ME));
	//and ebx, r.gpr[rA]
	AB(0x23);
	AB(0x1D);
	AD(&r.gpr[rA]);
	//or eax, ebx
	AB(0x0B);
	AB(0xC3);
	//mov r.gpr[rA], eax
	AB(0xA3);
	AD(&r.gpr[rA]);

	if(Rc)
		add_setcr0();
}

void Recompiler::_andis() {
	D_rS; D_rA; D_UIMM;

	//r.gpr[rA] = r.gpr[rS] & (DWORD(UIMM) << 16);
	if(rA == rS) {  //Optimization
		//and r.gpr[rA], (DWORD(UIMM) << 16)
		AB(0x81);
		AB(0x25);
		AD(&r.gpr[rA]);
		AD((DWORD(UIMM) << 16));
	} else {
		//mov eax, r.gpr[rS]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rS]);
		//and eax, (DWORD(UIMM) << 16)
		ADD_BYTE(0x25);
		ADD_DWORD((DWORD(UIMM) << 16));
		//mov r.gpr[rA], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rA]);
	}
	add_setcr0();
}

void Recompiler::_addic() {
	D_rD; D_rA; D_SIMM;

	//r.gpr[rD] = rA + SIMM;
	//setflags(r.xer, XER_CA, carry(r.gpr[rA], SIMM));
	if(rD == rA) {  //Optimization
		//add r.gpr[rD], SIMM 
		AB(0x81);
		AB(0x05);
		AD(&r.gpr[rD]);
		AD(SIMM);
	} else {
		//mov eax, r.gpr[rA]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rA]);
		//add eax, SIMM
		ADD_BYTE(0x05);
		ADD_DWORD(SIMM);
		//mov r.gpr[rD], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rD]);
	}
	//jnc no_carry
	AB(0x73);
	request_label("no_carry");
	{ //carry:
		//bts r.xer, 31-2
		AB(0x0F);
		AB(0xBA);
		AB(0x2D);
		AD(&r.xer);
		AB(31-2);
		//jmp end
		AB(0xEB);
		request_label("end");
	}
	{
		add_label("no_carry");
		//btr r.xer, 31-2
		AB(0x0F);
		AB(0xBA);
		AB(0x35);
		AD(&r.xer);
		AB(31-2);
	}
	add_label("end");
}

void Recompiler::_sc() {
	if(!g::nerf_sc) {
		//interrupt.raise(INTERRUPT_SC);
		//push edi
		AB(0x57);
		//push DWORD(INTERRUPT_SC)
		AB(0x68);
		AD(INTERRUPT_SC);
		//mov ecx, &interrupt //the this pointer
		AB(0xB9);
		AD(&interrupt);
		//call interrupt.raise  //thiscall assumed
		AB(0xE8);
		bool (InterruptRaiser::*temp)(WORD) = &InterruptRaiser::raise;
		REQUEST_CALL(MAKE(DWORD, temp));
		//pop edi
		AB(0x5F);

		//mov eax, eia2pos(nia);
		AB(0xB8);
		AD(eia2pos(nia));
		//return
		AB(0xC3);
	}
}

void Recompiler::_cror() {
	D_crbD; D_crbA; D_crbB;

	//setflags(r.cr, makeflag(crbD), getbit(r.cr, crbA) || getbit(r.cr, crbB));
	//mov eax, r.cr
	AB(0xA1);
	AD(&r.cr);
	//bt eax, 31-crbA
	AB(0x0F);
	AB(0xBA);
	AB(0xE0);
	AB((BYTE)31 - crbA);
	//jc set
	AB(0x72);
	request_label("set");
	//bt eax, 31-crbB
	AB(0x0F);
	AB(0xBA);
	AB(0xE0);
	AB((BYTE)31 - crbB);
	//jc set
	AB(0x72);
	request_label("set");
	{ //clear:
		//btr eax, 31-crbD
		AB(0x0F);
		AB(0xBA);
		AB(0xF0);
		AB((BYTE)31 - crbD);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("set");
		//bts eax, 31-crbD
		AB(0x0F);
		AB(0xBA);
		AB(0xE8);
		AB((BYTE)31 - crbD);
	}

	add_label("store");
	//mov r.cr, eax
	AB(0xA3);
	AD(&r.cr);
}

void Recompiler::_twi() {
	D_TO; D_rA; D_SIMM;

	/*if((int(r.gpr[rA]) < SIMM && (TO & 0x10)) ||
	(int(r.gpr[rA]) > SIMM && (TO & 0x08)) ||
	(int(r.gpr[rA]) == SIMM && (TO & 0x04)) ||
	(r.gpr[rA] < SIMM && (TO & 0x02)) ||
	(r.gpr[rA] > SIMM && (TO & 0x01)))
	{
	r.srr1 = makeflag(14);
	interrupt.raise(INTERRUPT_PROGRAM);
	}*/
	if(TO == 0) {
		add_pos_fatal_error(REC_UNEMULATED_INSTRUCTION_FORM);
		SET_IT_ERROR;
		return;
	}
	//cmp r.gpr[rA], SIMM
	AB(0x81);
	AB(0x3D);
	AD(&r.gpr[rA]);
	AD(SIMM);
	if(TO & 0x10) {
		//jl raise
		AB(0x7C);
		request_label("raise");
	}
	if(TO & 0x08) {
		//jg raise
		AB(0x7F);
		request_label("raise");
	}
	if(TO & 0x04) {
		//je raise
		AB(0x74);
		request_label("raise");
	}
	if(TO & 0x02) {
		//jb raise
		AB(0x72);
		request_label("raise");
	}
	if(TO & 0x01) {
		//ja raise
		AB(0x77);
		request_label("raise");
	}
	//jmp end
	AB(0xEB);
	request_label("end");
	{
		add_label("raise");
		//r.srr1 = makeflag(14);
		//mov r.srr1, makeflag(14)
		AB(0xC7);
		AB(0x05);
		AD(&r.srr1);
		AD(makeflag(14));

		//interrupt.raise(INTERRUPT_PROGRAM);
		//push edi
		AB(0x57);
		//push DWORD(INTERRUPT_PROGRAM)
		AB(0x68);
		AD(INTERRUPT_PROGRAM);
		//mov ecx, &interrupt //the this pointer
		AB(0xB9);
		AD(&interrupt);
		//call interrupt.raise  //thiscall assumed
		AB(0xE8);
		bool (InterruptRaiser::*temp)(WORD) = &InterruptRaiser::raise;
		REQUEST_CALL(MAKE(DWORD, temp));
		//pop edi
		AB(0x5F);

		//mov eax, eia2pos(cia)
		AB(0xB8);
		AD(eia2pos(cia));
		//return
		AB(0xC3);
	}
	add_label("end");
}

void Recompiler::_bcctr() {
	D_BO; D_BI; //D_LK; //handled by error handler
	if(!getbitr(BO, 2))
		throw bad_form_exception(bad_form_exception::Invalid);

	if(BO != 0x14) {
		add_bc_cond(BO, BI);

		//if(cond_ok) {
		//cmp al, 0x02
		ADD_BYTE(0x3C);
		ADD_BYTE(0x02);
		//jnz nojump
		ADD_BYTE(0x75);
		request_label("nojump");
	}
	//mov eax, cia
	AB(0xB8);
	AD(cia);
	add_fatal_error(REC_BCCTR);
	add_label("nojump");
	last_instruction_type = (BO == 0x14) ? IT_ERROR : IT_BRANCH_CONDITIONAL;
}

void Recompiler::_lhzx() {
	D_rD; D_rA; D_rB;

	//r.gpr[rD] = m.rh(rA_0 + r.gpr[rB]);
	add_mem1_x(rA, rB, false);
	{
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rh	//thiscall assumed
		AB(0xE8);
		WORD (Hardware::*temp)(WORD) = &Hardware::rh;
		REQUEST_CALL(MAKE(DWORD, temp));
		//movzx eax, ax //assumed that the high bytes is zeroed. NOT SO.
		AB(0x0F);
		AB(0xB7);
		AB(0xC0);

		add_mem2();
	}
	{
		add_label("no_hardware");
		//movzx eax, word [eax]
		ADD_BYTE(0x0F);
		ADD_BYTE(0xB7);
		ADD_BYTE(0x00);
		//xchg al, ah
		ADD_BYTE(0x86);
		ADD_BYTE(0xC4);
	}
	add_label("end");
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);
}

void Recompiler::_stbx() {
	D_rS; D_rA; D_rB;

	//m.wb(rA_0 + r.gpr[rB], (BYTE)r.gpr[rS]);
	add_mem1_x(rA, rB, true);
	{
		//push r.gpr[rS]  //arguments are expanded to 32 bits as they're passed
		AB(0xFF);
		AB(0x35);
		AD(&r.gpr[rS]);
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.wb	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, BYTE) = &Hardware::wb;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov bl, r.gpr[rS] //this should load the least-significant byte
		ADD_BYTE(0x8A);
		ADD_BYTE(0x1D);
		ADD_DWORD(&r.gpr[rS]);
		//mov [eax], bl
		ADD_BYTE(0x88);
		ADD_BYTE(0x18);
	}
	add_label("end");
}

void Recompiler::_lha() {
	D_rD; D_rA; D_d16;

	//r.gpr[rD] = m.rh(rA_0 + d);
	add_mem1_rA0d(rA, d, false);
	if(rA != 0) {
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rh	//thiscall assumed
		AB(0xE8);
		WORD (Hardware::*temp)(WORD) = &Hardware::rh;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov ax, [eax]
		ADD_BYTE(0x66);
		AB(0x8B);
		ADD_BYTE(0x00);
		//xchg al, ah
		ADD_BYTE(0x86);
		ADD_BYTE(0xC4);
	}
	add_label("end");
	//movsx eax, ax
	AB(0x0F);
	AB(0xBF);
	AB(0xC0);
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);
}

void Recompiler::_subfe() {
	D_rD; D_rA; D_rB; D_OE; D_Rc;

	//r.gpr[rD] = ~r.gpr[rA] + r.gpr[rB] + xerca;
	//mov eax, r.gpr[rA]
	AB(0xA1);
	AD(&r.gpr[rA]);
	//not eax
	AB(0xF7);
	AB(0xD0);
	//bt r.xer, 31-2  //is bit 2 in PPC
	AB(0x0F);
	AB(0xBA);
	AB(0x25);
	AD(&r.xer);
	AB(31-2);
	//adc eax, r.gpr[rB]	//using CF from bt
	ADD_BYTE(0x13);
	ADD_BYTE(0x05);
	ADD_DWORD(&r.gpr[rB]);
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);

	if(OE)
		add_setoverflow();
	if(Rc)
		add_setcr0();

	add_setxerca_by_cf();
}

void Recompiler::_mulhwu() {
	D_rD; D_rA; D_rB; D_Rc;

	//r.gpr[rD] = PPC_HIWORD((QWORD(r.gpr[rA]) * QWORD(r.gpr[rB]));
	//mov eax, r.gpr[rA]
	ADD_BYTE(0xA1);
	ADD_DWORD(&r.gpr[rA]);
	//mul r.gpr[rB]
	AB(0xF7);
	AB(0x25);
	AD(&r.gpr[rB]);
	//mov r.gpr[rD], edx
	AB(0x89);
	AB(0x15);
	AD(&r.gpr[rD]);

	if(Rc) {
		//test edx, edx
		AB(0x85);
		AB(0xD2);

		add_setcr0();
	}
}

void Recompiler::_adde() {
	D_rD; D_rA; D_rB; D_OE; D_Rc;

	//r.gpr[rD] = r.gpr[rB] + r.gpr[rA] + xerca;
	//mov eax, r.gpr[rB]
	ADD_BYTE(0xA1);
	ADD_DWORD(&r.gpr[rB]);
	//bt r.xer, 31-2  //is bit 2 in PPC
	AB(0x0F);
	AB(0xBA);
	AB(0x25);
	AD(&r.xer);
	AB(31-2);
	//adc eax, r.gpr[rA]	//using CF from bt
	ADD_BYTE(0x13);
	ADD_BYTE(0x05);
	ADD_DWORD(&r.gpr[rA]);
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);

	if(OE)
		add_setoverflow();
	if(Rc)
		add_setcr0();

	add_setxerca_by_cf();
}

void Recompiler::_mcrf() {
	D_crfD; D_crfS;

	//r.setcr(crfD, getbitsw(r.cr, crfS*4, crfS*4+3));
	//mov eax, r.cr
	AB(0xA1);
	AD(&r.cr);
	if(crfS == 7) {
		//shl eax, 4
		AB(0xC1);
		AB(0xE0);
		AB(4);
	} else {
		//shr eax, 4 * (6 - crfS) //we must use the high nibble of al, not the low
		AB(0xC1);
		AB(0xE8);
		AB(4 * (6 - crfS));
	}
	//and al, 0xF0
	AB(0x24);
	AB(0xF0);

	add_setal2cr(crfD);
}

void Recompiler::_mtcrf() {
	D_rS; D_CRM;

	if(CRM == 0xFF) {  //Optimization
		//mov eax, r.gpr[rS]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rS]);
		//mov r.cr, eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.cr);
	} else {
		DWORD bitmask=0;
		for(int i=0; i<8; i++) if(getbit(CRM, 24+i)) {
			bitmask |= makemaskw(i*4, i*4+3);
		}
		{
			//setflags(r.cr, bitmask, false);
			//and r.cr, ~bitmask
			AB(0x81);
			AB(0x25);
			AD(&r.cr);
			AD(~bitmask);
			//setflags(r.cr, bitmask & r.gpr[rS], true);
			//mov eax, bitmask
			AB(0xB8);
			AD(bitmask);
			//and eax, r.gpr[rS]
			AB(0x23);
			AB(0x05);
			AD(&r.gpr[rS]);
			//or r.cr, eax
			AB(0x09);
			AB(0x05);
			AD(&r.cr);
		}
	}
}

void Recompiler::_fcmpu() {
	D_crfD; D_frA; D_frB;

	FLD_PS_D(frB);  //load into ST(1)
	FLD_PS_D(frA);  //load into ST(0)
	//fucompp //Compare ST(0) with ST(1) and pop register stack twice
	AB(0xDA);
	AB(0xE9);
	//fnstsw ax
	AB(0xDF);
	AB(0xE0);
	//sahf  (C3, C2, C0) => ZF, PF, CF
	AB(0x9E);
	//jp unordered
	AB(0x7A);
	request_label("unordered");
	//jc less
	AB(0x72);
	request_label("less");
	//jz equal
	AB(0x74);
	request_label("equal");
	{ //greater:
		//mov al, 0x40
		AB(0xB0);
		AB(0x40);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("unordered");
		//mov al, 0x10
		AB(0xB0);
		AB(0x10);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("less");
		//mov al, 0x80
		AB(0xB0);
		AB(0x80);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("equal");
		//mov al, 0x20
		AB(0xB0);
		AB(0x20);
	}

	add_label("store");
	add_set_fpscr_fpcc_by_al();
	add_setal2cr(crfD);
}

void Recompiler::_stfs() {
	D_frS; D_rA; D_d16;

	//m.ww(rA_0 + d, MAKE(DWORD, GET_FPR_SINGLE(frS)));
	FLD_PS_D(frS);

	add_mem1_rA0d(rA, d, true);
	if(rA != 0) {
		//push (float)FPR_PS_D(frS)
		AB(0x83); AB(0xEC); AB(4);	//sub esp, 4
		AB(0xD9); AB(0x1C); AB(0x24); //fstp dword [esp]
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.ww	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, DWORD) = &Hardware::ww;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		AB(0xD9); AB(0x18);	//fstp dword [eax]

		//mov ebx, [eax]
		AB(0x8B);
		AB(0x18);
		//bswap ebx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xCB);
		//mov [eax], ebx
		AB(0x89);
		AB(0x18);
	}
	add_label("end");
}

void Recompiler::_mullw() {
	D_rD; D_rA; D_rB; D_OE; D_Rc;

	//r.gpr[rD] = DWORD((int)r.gpr[rA] * (int)r.gpr[rB]);
	//mov eax, r.gpr[rA]
	AB(0xA1);
	AD(&r.gpr[rA]);
	//imul r.gpr[rB]
	AB(0xF7);
	AB(0x2D);
	AD(&r.gpr[rB]);
	//mov r.gpr[rD], eax
	AB(0xA3);
	AD(&r.gpr[rD]);

	if(OE) {
		if(Rc) {
			//mov edx, eax
			AB(0x89);
			AB(0xC2);
		}
		add_setoverflow();	//trashes eax that we need
	}
	if(Rc) {
		if(OE) {
			//test edx, edx
			AB(0x85);
			AB(0xD2);
		} else {
			//test eax, eax
			AB(0x85);
			AB(0xC0);
		}

		add_setcr0();
	}
}

void Recompiler::_slw() {
	D_rS; D_rA; D_rB; D_Rc;

	//r.gpr[rA] = getbit(r.gpr[rB], 26) ? 0 : r.gpr[rS] << (r.gpr[rB] & 0x0000001F);
	//mov ecx, r.gpr[rB]
	AB(0x8B);
	AB(0x0D);
	AD(&r.gpr[rB]);
	//bt ecx, 31-26  //31-26=5	//sixth bit, as it should be
	AB(0x0F);
	AB(0xBA);
	AB(0xE1);
	AB(31-26);
	//jnc standard
	AB(0x73);
	request_label("standard");
	{ //special:
		//xor eax, eax
		AB(0x31);
		AB(0xC0);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("standard");
		//mov eax, r.gpr[rS]
		AB(0xA1);
		AD(&r.gpr[rS]);
		//and ecx, SignExtend(0x3F)	//and ecx, 0x0000003F
		AB(0x83);
		AB(0xE1);
		AB(0x3F);
		//jz store
		AB(0x74);
		request_label("store");
		//shl eax, cl //If the count (CL) is 0, the flags are not affected.	//fixed
		AB(0xD3);
		AB(0xE0);
	}
	add_label("store");
	//mov r.gpr[rA], eax
	AB(0xA3);
	AD(&r.gpr[rA]);

	if(Rc) {
		//test eax, eax
		AB(0x85);
		AB(0xC0);
		add_setcr0();
	}
}

void Recompiler::_srw() {
	D_rS; D_rA; D_rB; D_Rc;

	//r.gpr[rA] = getbit(r.gpr[rB], 26) ? 0 : r.gpr[rS] >> (r.gpr[rB] & 0x0000001F);
	//mov ecx, r.gpr[rB]
	AB(0x8B);
	AB(0x0D);
	AD(&r.gpr[rB]);
	//bt ecx, 31-26  //31-26=5	//sixth bit, as it should be
	AB(0x0F);
	AB(0xBA);
	AB(0xE1);
	AB(31-26);
	//jnc standard
	AB(0x73);
	request_label("standard");
	{ //special:
		//xor eax, eax
		AB(0x31);
		AB(0xC0);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("standard");
		//mov eax, r.gpr[rS]
		AB(0xA1);
		AD(&r.gpr[rS]);
		//and ecx, SignExtend(0x3F)	//and ecx, 0x0000003F
		AB(0x83);
		AB(0xE1);
		AB(0x3F);
		//jz store
		AB(0x74);
		request_label("store");
		//shr eax, cl
		AB(0xD3);
		AB(0xE8);
	}
	add_label("store");
	//mov r.gpr[rA], eax
	AB(0xA3);
	AD(&r.gpr[rA]);

	if(Rc) {
		//test eax, eax
		AB(0x85);
		AB(0xC0);
		add_setcr0();
	}
}

void Recompiler::_subfc() {
	D_rD; D_rA; D_rB; D_OE; D_Rc;

	//r.gpr[rD] = ~r.gpr[rA] + r.gpr[rB] + 1;
	//mov eax, r.gpr[rA]
	ADD_BYTE(0xA1);
	ADD_DWORD(&r.gpr[rA]);
	//not eax
	AB(0xF7);
	AB(0xD0);
	//stc
	AB(0xF9);
	//adc eax, r.gpr[rB]
	ADD_BYTE(0x13);
	ADD_BYTE(0x05);
	ADD_DWORD(&r.gpr[rB]);
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);

	if(OE)
		add_setoverflow();
	if(Rc)
		add_setcr0();

	add_setxerca_by_cf();
}

void Recompiler::_lbzu() {
	D_rD; D_rA; D_d16;
	if(rA == 0 || rA == rD)
		throw bad_form_exception(bad_form_exception::Invalid);

	//r.gpr[rD] = m.rb(rA_0 + d);
	add_mem1_rA0d(rA, d, false);
	if(rA != 0) {
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rb	//thiscall assumed
		AB(0xE8);
		BYTE (Hardware::*temp)(WORD) = &Hardware::rb;
		REQUEST_CALL(MAKE(DWORD, temp));
		//movzx eax, al //assumed that the high bytes is zeroed. NOT SO.
		AB(0x0F);
		AB(0xB6);
		AB(0xC0);

		add_mem2();
	}
	{
		add_label("no_hardware");
		//movzx eax, byte [eax]
		ADD_BYTE(0x0F);
		ADD_BYTE(0xB6);
		ADD_BYTE(0x00);
	}
	add_label("end");
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);

	//r.gpr[rA] += d;
	//add r.gpr[rA], d
	AB(0x81);
	AB(0x05);
	AD(&r.gpr[rA]);
	AD(d);
}

void Recompiler::_addme() {
	D_rD; D_rA; D_OE; D_Rc;

	//r.gpr[rD] = r.gpr[rA] + XER_CA + 0xFFFFFFFF;
	//mov eax, r.gpr[rA]
	AB(0xA1);
	AD(&r.gpr[rA]);
	//bt r.xer, 31-2  //is bit 2 in PPC
	AB(0x0F);
	AB(0xBA);
	AB(0x25);
	AD(&r.xer);
	AB(31-2);
	//adc eax, -1	//using CF from bt
	AB(0x83);
	AB(0xD0);
	AB(-1);
	//mov r.gpr[rD], eax
	AB(0xA3);
	AD(&r.gpr[rD]);

	if(OE)
		add_setoverflow();
	if(Rc)
		add_setcr0();

	add_setxerca_by_cf();
}

void Recompiler::_stbu() {
	D_rS; D_rA; D_d16;
	if(rA == 0)
		throw bad_form_exception(bad_form_exception::Invalid);

	//m.wb(r.gpr[rA] + d, (BYTE)r.gpr[rS]);
	add_mem1_rA0d(rA, d, true);
	{
		//push r.gpr[rS]  //arguments are expanded to 32 bits as they're passed
		AB(0xFF);
		AB(0x35);
		AD(&r.gpr[rS]);
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.wb	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, BYTE) = &Hardware::wb;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov bl, r.gpr[rS] //this should load the least-significant byte
		ADD_BYTE(0x8A);
		ADD_BYTE(0x1D);
		ADD_DWORD(&r.gpr[rS]);
		//mov [eax], bl
		ADD_BYTE(0x88);
		ADD_BYTE(0x18);
	}
	add_label("end");

	//r.gpr[rA] += d;
	//add r.gpr[rA], d
	AB(0x81);
	AB(0x05);
	AD(&r.gpr[rA]);
	AD(d);
}

void Recompiler::_fneg() {
	D_frD; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//can be optimized for frD == frB
	//SET_FPRD(-FPR_PS_D(frB));
	FLD_PS_D(frB);
	//fchs
	AB(0xD9);
	AB(0xE0);

	FSTPD;
}

void Recompiler::_lwzu() {
	D_rD; D_rA; D_d16;
	if(rA == 0 || rA == rD)
		throw bad_form_exception(bad_form_exception::Invalid);

	//r.gpr[rD] = m.rw(rA_0 + d);
	add_mem1_rA0d(rA, d, false);
	if(rA != 0) {
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rw	//thiscall assumed
		AB(0xE8);
		DWORD (Hardware::*temp)(WORD) = &Hardware::rw;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov eax, [eax]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x00);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
	}
	add_label("end");
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);

	//r.gpr[rA] += d;
	//add r.gpr[rA], d
	AB(0x81);
	AB(0x05);
	AD(&r.gpr[rA]);
	AD(d);
}

void Recompiler::_sraw() {
	D_rS; D_rA; D_rB; D_Rc;

	/*DWORD SH = r.gpr[rB] & 0x0000001F;
	DWORD d=0;
	if(getbit(r.gpr[rB], 26)) {
	if(signw(r.gpr[rS]))
	d = makemaskw(0, 31);
	setflags(r.xer, XER_CA, signw(r.gpr[rS]));
	} else {
	d = r.gpr[rS] >> SH;
	if(signw(r.gpr[rS]) && SH != 0)
	d |= makemaskw(0, SH);
	setflags(r.xer, XER_CA, signw(r.gpr[rS]) &&
	getbitsw(r.gpr[rS], 32-SH, 31) != 0 && SH != 0);
	}*/
	//mov ecx, r.gpr[rB]
	AB(0x8B);
	AB(0x0D);
	AD(&r.gpr[rB]);
	//bt ecx, 31-26  //31-26=5	//sixth bit from the right, as it should be
	AB(0x0F);
	AB(0xBA);
	AB(0xE1);
	AB(31-26);
	//jnc standard
	AB(0x73);
	request_label("standard");
	{ //special:
		//xor eax, eax
		AB(0x31);
		AB(0xC0);
		//bt r.gpr[rS], 31-0
		AB(0x0F);
		AB(0xBA);
		AB(0x25);
		AD(&r.gpr[rS]);
		AB(31-0);
		//jnc zeroes
		AB(0x73);
		request_label("zeroes");
		{ //ones:
			//dec eax
			AB(0x48);
		}
		{
			add_label("zeroes");
		}
		//mov r.gpr[rA], eax
		AB(0xA3);
		AD(&r.gpr[rA]);

		if(Rc)
			add_setcr0();

		add_setxerca_by_nzf();
		//jmp end
		AB(0xEB);
		request_label("end");
	}
	{
		add_label("standard");
		//mov eax, r.gpr[rS]
		AB(0xA1);
		AD(&r.gpr[rS]);
		//bsf ebx, eax
		AB(0x0F);
		AB(0xBC);
		AB(0xD8);
		//jnz nonzero_source
		AB(0x75);
		request_label("nonzero_source");
		{ //zero_source:
			add_label("clearxerca_jrc");
			//clear XER_CA
			//and byte ptr ((&r.xer) + 3), ~0x20
			AB(0x80);
			AB(0x25);
			AD(DWORD(&r.xer) + 3);
			AB(~0x20);
			if(Rc) {
				//test eax, eax
				AB(0x85);
				AB(0xC0);
			}
			//jmp store/rc
			AB(0xEB);
			request_label("store/rc");
		}
		{
			add_label("nonzero_source");
			//and ecx, SignExtend(0x1F)	//and ecx, 0x0000001F
			AB(0x83);
			AB(0xE1);
			AB(0x1F);
			//jz clearxerca_jrc
			AB(0x74);
			request_label("clearxerca_jrc");
			{	//nonzero_shift:
				//set_xerca(BSF(src) < SH) && signw(src));  
				//the problem lies here. we don't make sure that the source's sign bit is 1.
				//bt eax, 31-0
				AB(0x0F);
				AB(0xBA);
				AB(0xE0);
				AB(31-0);
				//jc more
				AB(0x72);
				request_label("more");
				{ //less:
					//clear XER_CA
					//and byte ptr ((&r.xer) + 3), ~0x20
					AB(0x80);
					AB(0x25);
					AD(DWORD(&r.xer) + 3);
					AB(~0x20);
					//jmp shift
					AB(0xEB);
					request_label("shift");
				}
				{
					add_label("more");
					//cmp ebx, ecx  //?
					AB(0x3B);
					AB(0xD9);

					add_setxerca_by_cf(); 
				}

				add_label("shift");
				//sar eax, cl
				AB(0xD3);
				AB(0xF8);
			}
		}

		add_label("store/rc");
		//mov r.gpr[rA], eax
		AB(0xA3);
		AD(&r.gpr[rA]);
		if(Rc)
			add_setcr0();
	}
	add_label("end");
}

void Recompiler::_fdiv() {
	D_frD; D_frA; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPRD_FPRF(FPR_PS_D(frA) / FPR_PS_D(frB));
	SET_DISP8;
	FLD_PS_D_DISP8(frA);
	FDIV_PS_D_DISP8(frB);
	add_set_fpscr_fprf();
	FSTPD_DISP8;
}

void Recompiler::_lfdx() {
	D_frD; D_rA; D_rB;

	//r.fpr[frD].dword = m.rd(rA_0 + r.gpr[rB]);
	add_mem1_x(rA, rB, false);
	{
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rd	//thiscall assumed
		AB(0xE8);
		QWORD (Hardware::*temp)(WORD) = &Hardware::rd;
		REQUEST_CALL(MAKE(DWORD, temp));
		//mov r.fpr[frD].loword, eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.fpr[frD].loword);
		//mov r.fpr[frD].hiword, edx
		ADD_BYTE(0x89);
		AB(0x15);
		ADD_DWORD(&r.fpr[frD].hiword);

		add_mem2();
	}
	{
		add_label("no_hardware");
		//r.fpr[frD].dword = swapd([eax]);
		//mov ebx, eax
		AB(0x8B);
		AB(0xD8);

		//mov eax, [ebx]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x03);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
		//mov r.fpr[frD].hiword, eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.fpr[frD].hiword);

		//mov eax, [ebx + 4]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x43);
		AB(4);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
		//mov r.fpr[frD].loword, eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.fpr[frD].loword);
	}
	add_label("end");
}

void Recompiler::_fmul() {
	D_frD; D_frA; D_frC; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPRD_FPRF(FPR_PS_D(frA) * FPR_PS_D(frC));
	SET_DISP8;
	FLD_PS_D_DISP8(frA);
	FMUL_PS_D_DISP8(frC);
	add_set_fpscr_fprf();
	FSTPD_DISP8;
}

void Recompiler::_divwu() {
	D_rD; D_rA; D_rB; D_OE; D_Rc;
	/*if(r.gpr[rB] == 0) {
	if(OE)
	r.set_overflow(true);
	r.gpr[rD] = 0;  //undefined, tested with GekkoTest
	} else {
	if(OE)
	r.set_overflow(false);
	r.gpr[rD] = r.gpr[rA] / r.gpr[rB];
	}*/

	//mov ebx, r.gpr[rB]
	AB(0x8B);
	AB(0x1D);
	AD(&r.gpr[rB]);
	//test ebx, ebx
	AB(0x85);
	AB(0xDB);
	//jnz nonzero_divisor:
	AB(0x75);
	request_label("nonzero_divisor");
	{ //zero_divisor:
		if(OE) {
			//setflags(xer, XER_OV | XER_SO, true);
			//or r.xer(byte 0/3), (XER_OV | XER_SO) >> 24
			ADD_BYTE(0x80);
			ADD_BYTE(0x0D);
			ADD_DWORD(DWORD(&r.xer) + 3);
			ADD_BYTE((XER_OV | XER_SO) >> 24);
		}
		//mov eax, 0
		AB(0xB8);
		AD(0);
		//jmp save
		ADD_BYTE(0xEB);
		request_label("save");
	}
	{
		add_label("nonzero_divisor");
		if(OE) {
			//setflags(xer, XER_OV, false);
			//and r.xer(byte 0/3), (~XER_OV) >> 24
			ADD_BYTE(0x80);
			ADD_BYTE(0x25);
			ADD_DWORD(DWORD(&r.xer) + 3);
			ADD_BYTE((~XER_OV) >> 24);
		}
		//xor edx, edx
		AB(0x31);
		AB(0xD2);
		//mov eax, r.gpr[rA]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rA]);
		//div ebx
		AB(0xF7);
		AB(0x35);
		AD(&r.gpr[rB]);
	}

	add_label("save");
	//mov r.gpr[rD], eax
	AB(0xA3);
	AD(&r.gpr[rD]);
	if(Rc) {
		//test eax, eax
		AB(0x85);
		AB(0xC0);
		add_setcr0();
	}
}

void Recompiler::_fmadd() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPRD_FPRF(FPR_PS_D(frA) * FPR_PS_D(frB) + FPR_PS_D(frB));
	SET_DISP8;
	FLD_PS_D_DISP8(frA);
	FMUL_PS_D_DISP8(frC);
	FADD_PS_D_DISP8(frB);
	add_set_fpscr_fprf();
	FSTPD_DISP8;
}

void Recompiler::_crnor() {
	D_crbD; D_crbA; D_crbB;

	//setflags(r.cr, makeflag(crbD), !(getbit(r.cr, crbA) || getbit(r.cr, crbB)));
	//mov eax, r.cr
	AB(0xA1);
	AD(&r.cr);
	//bt eax, 31-crbA
	AB(0x0F);
	AB(0xBA);
	AB(0xE0);
	AB((BYTE)31 - crbA);
	//jc clear
	AB(0x72);
	request_label("clear");
	//bt eax, 31-crbB
	AB(0x0F);
	AB(0xBA);
	AB(0xE0);
	AB((BYTE)31 - crbB);
	//jc clear
	AB(0x72);
	request_label("clear");
	{ //set:
		//bts eax, 31-crbD
		AB(0x0F);
		AB(0xBA);
		AB(0xE8);
		AB((BYTE)31 - crbD);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("clear");
		//btr eax, 31-crbD
		AB(0x0F);
		AB(0xBA);
		AB(0xF0);
		AB((BYTE)31 - crbD);
	}

	add_label("store");
	//mov r.cr, eax
	AB(0xA3);
	AD(&r.cr);
}

void Recompiler::_nand() {
	D_rS; D_rA; D_rB; D_Rc;

	//r.gpr[rA] = ~(r.gpr[rS] & r.gpr[rB]);	//Optimizations are possible
	//mov eax, r.gpr[rS]
	ADD_BYTE(0xA1);
	ADD_DWORD(&r.gpr[rS]);
	//and eax, r.gpr[rB]
	ADD_BYTE(0x23);
	ADD_BYTE(0x05);
	ADD_DWORD(&r.gpr[rB]);
	//not eax
	AB(0xF7);
	AB(0xD0);
	//mov r.gpr[rA], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rA]);

	if(Rc) {
		//test eax, eax //To set eflags; not doesn't.
		AB(0x85);
		AB(0xC0);

		add_setcr0();
	}
}

void Recompiler::_nor() {
	D_rS; D_rA; D_rB; D_Rc;

	//r.gpr[rA] = ~(r.gpr[rS] | r.gpr[rB]);	//Optimizations are possible
	//mov eax, r.gpr[rS]
	ADD_BYTE(0xA1);
	ADD_DWORD(&r.gpr[rS]);
	//or eax, r.gpr[rB]
	ADD_BYTE(0x0B);
	ADD_BYTE(0x05);
	ADD_DWORD(&r.gpr[rB]);
	//not eax
	AB(0xF7);
	AB(0xD0);
	//mov r.gpr[rA], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rA]);

	if(Rc) {
		//test eax, eax //To set eflags; not doesn't.
		AB(0x85);
		AB(0xC0);

		add_setcr0();
	}
}

void Recompiler::_fadd() {
	D_frD; D_frA; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPRD_FPRF(FPR_PS_D(frA) + FPR_PS_D(frB));
	SET_DISP8;
	FLD_PS_D_DISP8(frA);
	FADD_PS_D_DISP8(frB);
	add_set_fpscr_fprf();
	FSTPD_DISP8;
}

void Recompiler::_addc() {
	D_rD; D_rA; D_rB; D_OE; D_Rc;

	//setflags(r.xer, XER_CA, carry(r.gpr[rA], r.gpr[rB]));
	//r.gpr[rD] = r.gpr[rA] + r.gpr[rB];
	if(rD == rA || rD == rB) {  //Optimization
		//NOP if(rD == rA && rD == rB && !Rc && !OE). Unlikely. Ignored.
		//mov eax, r.gpr[rD == rA ? rB : rA]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rD == rA ? rB : rA]);
		//add r.gpr[rD], eax
		AB(0x01);
		AB(0x05);
		AD(&r.gpr[rD]);
	} else {
		//mov eax, r.gpr[rA]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rA]);
		//add eax, r.gpr[rB]
		ADD_BYTE(0x03);
		ADD_BYTE(0x05);
		ADD_DWORD(&r.gpr[rB]);
		//mov r.gpr[rD], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rD]);
	}

	if(OE)
		add_setoverflow();
	if(Rc)
		add_setcr0();

	add_setxerca_by_cf();
}

void Recompiler::_fmsub() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPRD_FPRF(FPR_PS_D(frA) * FPR_PS_D(frB) - FPR_PS_D(frB));
	SET_DISP8;
	FLD_PS_D_DISP8(frA);
	FMUL_PS_D_DISP8(frC);
	FSUB_PS_D_DISP8(frB);
	add_set_fpscr_fprf();
	FSTPD_DISP8;
}

void Recompiler::_fmadds() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPR_SINGLE_FPRF(FPR_PS_D(frA) * FPR_PS_D(frC) + FPR_PS_D(frB));
	SET_DISP8;
	FLD_PS_D_DISP8(frA);
	FMUL_PS_D_DISP8(frC);
	FADD_PS_D_DISP8(frB);
	add_set_fpscr_fprf();
	SET_FPR_SINGLE_DISP8;
}

void Recompiler::_fdivs() {
	D_frD; D_frA; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPR_SINGLE_FPRF(FPR_PS_D(frA) / FPR_PS_D(frB));
	SET_DISP8;
	FLD_PS_D_DISP8(frA);
	FDIV_PS_D_DISP8(frB);
	add_set_fpscr_fprf();
	SET_FPR_SINGLE_DISP8;
}

void Recompiler::_fmsubs() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPR_SINGLE_FPRF(FPR_PS_D(frA) * FPR_PS_D(frC) - FPR_PS_D(frB));
	SET_DISP8;
	FLD_PS_D_DISP8(frA);
	FMUL_PS_D_DISP8(frC);
	FSUB_PS_D_DISP8(frB);
	add_set_fpscr_fprf();
	SET_FPR_SINGLE_DISP8;
}

void Recompiler::_subfze() {
	D_rD; D_rA; D_OE; D_Rc;

	//r.gpr[rD] = ~r.gpr[rA] + XER_CA;
	//bt r.xer, 31-2  //is bit 2 in PPC
	AB(0x0F);
	AB(0xBA);
	AB(0x25);
	AD(&r.xer);
	AB(31-2);
	//mov eax, r.gpr[rA]
	AB(0xA1);
	AD(&r.gpr[rA]);
	//not eax
	AB(0xF7);
	AB(0xD0);
	//adc eax, 0  //using CF from bt
	AB(0x83);
	AB(0xD0);
	AB(0);
	//mov r.gpr[rD], eax
	AB(0xA3);
	AD(&r.gpr[rD]);

	if(OE)
		add_setoverflow();
	if(Rc)
		add_setcr0();

	add_setxerca_by_cf();
}

void Recompiler::_subfme() {
	D_rD; D_rA; D_OE; D_Rc;

	//r.gpr[rD] = ~r.gpr[rA] + XER_CA + 0xFFFFFFFF;
	//bt r.xer, 31-2  //is bit 2 in PPC
	AB(0x0F);
	AB(0xBA);
	AB(0x25);
	AD(&r.xer);
	AB(31-2);
	//mov eax, r.gpr[rA]
	AB(0xA1);
	AD(&r.gpr[rA]);
	//not eax
	AB(0xF7);
	AB(0xD0);
	//adc eax, -1  //using CF from bt
	AB(0x83);
	AB(0xD0);
	AB(-1);
	//mov r.gpr[rD], eax
	AB(0xA3);
	AD(&r.gpr[rD]);

	if(OE)
		add_setoverflow();
	if(Rc)
		add_setcr0();

	add_setxerca_by_cf();
}

void Recompiler::_divw() {
	D_rD; D_rA; D_rB; D_OE; D_Rc;
	/*if((r.gpr[rA] == 0x80000000 && r.gpr[rB] == -1) || r.gpr[rB] == 0) {
	if(OE)
	r.set_overflow(true);
	//undefined, tested with GekkoTest
	if(r.gpr[rA] & 0x80000000)
	r.gpr[rD] = (DWORD)-1;
	else
	r.gpr[rD] = 0;
	} else {
	if(OE)
	r.set_overflow(false);
	r.gpr[rD] = int(r.gpr[rA]) / int(r.gpr[rB]);
	}*/

	//mov eax, r.gpr[rA]
	ADD_BYTE(0xA1);
	ADD_DWORD(&r.gpr[rA]);
	//mov ebx, r.gpr[rB]
	AB(0x8B);
	AB(0x1D);
	AD(&r.gpr[rB]);
	//test ebx, ebx
	AB(0x85);
	AB(0xDB);
	//jz undefined_operation:
	AB(0x74);
	request_label("undefined_operation");
	//cmp ebx, -1
	AB(0x83);
	AB(0xFB);
	AB(-1);
	//jne defined_operation
	AB(0x75);
	request_label("defined_operation");
	//cmp eax, 0x80000000
	AB(0x3D);
	AD(0x80000000);
	//jne defined_operation
	AB(0x75);
	request_label("defined_operation");
	{
		add_label("undefined_operation");
		if(OE) {
			//setflags(xer, XER_OV | XER_SO, true);
			//or r.xer(byte 0/3), (XER_OV | XER_SO) >> 24
			ADD_BYTE(0x80);
			ADD_BYTE(0x0D);
			ADD_DWORD(DWORD(&r.xer) + 3);
			ADD_BYTE((XER_OV | XER_SO) >> 24);
		}
		//test eax, eax
		AB(0x85);
		AB(0xC0);
		//jns set_zero
		AB(0x79);
		request_label("set_zero");
		{ //set_-1:
			//mov eax, -1
			AB(0xB8);
			AD(-1);
			//jmp save
			ADD_BYTE(0xEB);
			request_label("save");
		}
		{
			add_label("set_zero");
			//mov eax, 0
			AB(0xB8);
			AD(0);
		}
		//jmp save
		ADD_BYTE(0xEB);
		request_label("save");
	}
	{
		add_label("defined_operation");
		if(OE) {
			//setflags(xer, XER_OV, false);
			//and r.xer(byte 0/3), (~XER_OV) >> 24
			ADD_BYTE(0x80);
			ADD_BYTE(0x25);
			ADD_DWORD(DWORD(&r.xer) + 3);
			ADD_BYTE((~XER_OV) >> 24);
		}
		//xor edx, edx
		AB(0x31);
		AB(0xD2);
		//bt eax, 31-0
		AB(0x0F);
		AB(0xBA);
		AB(0xE0);
		AB(31-0);
		//jnc div
		AB(0x73);
		request_label("div");
		{ //negative_dividend:
			//not edx
			AB(0xF7);
			AB(0xD2);
		}
		add_label("div");
		//idiv ebx
		AB(0xF7);
		AB(0x3D);
		AD(&r.gpr[rB]);
	}

	add_label("save");
	//mov r.gpr[rD], eax
	AB(0xA3);
	AD(&r.gpr[rD]);
	if(Rc) {
		//test eax, eax
		AB(0x85);
		AB(0xC0);
		add_setcr0();
	}
}

void Recompiler::_dcbi() {
	//D_rA; D_rB;
}

void Recompiler::_icbi() {
	//D_rA; D_rB;
	//mov eax, cia
	AB(0xB8);
	AD(cia);
	add_fatal_error(REC_ICBI);
	last_instruction_type = IT_ERROR;
}

void Recompiler::_mftb() {
	D_rD; D_spr;
	if(!r.mfspr_is_valid(spr) || !(spr == 268 || spr == 269))
		throw bad_form_exception(bad_form_exception::Invalid);

	//r.gpr[rD] = (spr == 268) ? getTBL() : getTBU();
	//push edi
	AB(0x57);
	//mov ecx, this //the this pointer
	AB(0xB9);
	AD(this);
	//call (spr == 268) ? getTBL() : getTBU()  //thiscall assumed
	AB(0xE8);
	DWORD (Recompiler::*temp)() = (spr == 268) ? &Recompiler::getTBL : &Recompiler::getTBU;
	REQUEST_CALL(MAKE(DWORD, temp));
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);
	//pop edi
	AB(0x5F);
}

void Recompiler::_lmw() {
	D_rD; D_rA; D_d16;
	if(rA >= rD)
		throw bad_form_exception(bad_form_exception::Invalid);

	/*DWORD EA = rA_0 + d;
	//if((EA & 0x3) != 0)	//exceptions aren't implemented yet anyway
	//throw lazer_exception("DSI Exception (0x00300)");
	while(rD < 32) {
	r.gpr[rD] = m.rw(EA);
	rD++;
	EA += 4;
	}*/

	add_mem1_rA0d(rA, d, false);
	if(rA != 0) {
		//it's too much work to do hardware accesses for every word here
		//pop edi
		AB(0x5F);
		add_pos_fatal_error(REC_UNEMULATED_INSTRUCTION_FORM);
	}
	{
		add_label("no_hardware");
		//push edi
		AB(0x57);
		//cld
		AB(0xFC);
		//mov esi, eax
		AB(0x89);
		AB(0xC6);
		//mov edi, &r.gpr[rD]
		AB(0xBF);
		AD(&r.gpr[rD]);
		//mov ecx, 32 - rD
		AB(0xB9);
		AD(32 - rD);

		add_label("loop1");
		//lodsd	//mov eax, [esi]; esi+=4;
		AB(0xAD);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
		//stosd	//mov [edi], eax; edi+=4;
		AB(0xAB);
		//loop loop1
		AB(0xE2);
		request_label("loop1");

		//pop edi
		AB(0x5F);
	}
	add_label("end");
}

void Recompiler::_rfi() {
	//r.msr = ((0x8780FF73 & r.srr1) | ((~0x8780FF73) & r.msr)) & (~0x00040000);
	//mov eax, r.srr1
	AB(0xA1);
	AD(&r.srr1);
	//and eax, 0x8780FF73
	AB(0x25);
	AD(0x8780FF73);
	//mov ebx, eax
	AB(0x8B);
	AB(0xD8);
	//eax = r.getmsr()
	//push edi
	AB(0x57);
	{
		//mov ecx, &r //the this pointer
		AB(0xB9);
		AD(&r);
		//call r.getmsr  //thiscall assumed
		AB(0xE8);
		DWORD (RegistersBase::*temp)() const = &Registers::getmsr;
		REQUEST_CALL(MAKE(DWORD, temp));
	}
	//and eax, ~0x8780FF73
	AB(0x25);
	AD(~0x8780FF73);
	//or eax, ebx
	AB(0x0B);
	AB(0xC3);
	//and eax, ~0x00040000
	AB(0x25);
	AD(~0x00040000);
	//r.setmsr(eax)
	{
		//push eax
		AB(0x50);
		//mov ecx, &r //the this pointer
		AB(0xB9);
		AD(&r);
		//call r.setmsr  //thiscall assumed
		AB(0xE8);
		void (Registers::*temp)(DWORD) = &Registers::setmsr;
		REQUEST_CALL(MAKE(DWORD, temp));
	}
	//pop edi
	AB(0x5F);

	//mov eax, cia
	AB(0xB8);
	AD(cia);
	add_fatal_error(REC_RFI);
	last_instruction_type = IT_ERROR;
}

void Recompiler::_stmw() {
	D_rD; D_rA; D_d16;

	/*DWORD EA = rA_0 + d;
	//if((EA & 0x3) != 0)	//exceptions aren't implemented yet anyway
	//throw lazer_exception("DSI Exception (0x00300)");
	while(rS <= 31) {
	m.ww(EA, r.gpr[rS]);
	rS++;
	EA += 4;
	}*/

	add_mem1_rA0d(rA, d, true);
	if(rA != 0) {
		//it's too much work to do hardware accesses for every word here
		//pop edi
		AB(0x5F);
		add_pos_fatal_error(REC_UNEMULATED_INSTRUCTION_FORM);
	}
	{
		add_label("no_hardware");
		//push edi
		AB(0x57);
		//cld
		AB(0xFC);
		//mov edi, eax
		AB(0x89);
		AB(0xC7);
		//mov esi, &r.gpr[rD]
		AB(0xBE);
		AD(&r.gpr[rD]);
		//mov ecx, 32 - rD
		AB(0xB9);
		AD(32 - rD);

		add_label("loop1");
		//lodsd	//mov eax, [esi]; esi+=4;
		AB(0xAD);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
		//stosd	//mov [edi], eax; edi+=4;
		AB(0xAB);
		//loop loop1
		AB(0xE2);
		request_label("loop1");

		//pop edi
		AB(0x5F);
	}
}

void Recompiler::_orc() {
	D_rS; D_rA; D_rB; D_Rc;

	//r.gpr[rA] = r.gpr[rS] | ~r.gpr[rB];
	//mov eax, r.gpr[rB]
	ADD_BYTE(0xA1);
	ADD_DWORD(&r.gpr[rB]);
	//not eax
	AB(0xF7);
	AB(0xD0);
	//or eax, r.gpr[rS] //(r.gpr[rS] | ~r.gpr[rB]) == (~r.gpr[rB] | r.gpr[rS])
	ADD_BYTE(0x0B);
	ADD_BYTE(0x05);
	ADD_DWORD(&r.gpr[rS]);
	//mov r.gpr[rA], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rA]);

	if(Rc)
		add_setcr0();
}

void Recompiler::_sthx() {
	D_rS; D_rA; D_rB;

	//m.wh(rA_0 + r.gpr[rB], (WORD)r.gpr[rS]);
	add_mem1_x(rA, rB, true);
	{
		//push r.gpr[rS]  //arguments are expanded to 32 bits as they're passed
		AB(0xFF);
		AB(0x35);
		AD(&r.gpr[rS]);
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.wh	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, WORD) = &Hardware::wh;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//movzx ebx, (WORD)r.gpr[rS]
		AB(0x0F);
		AB(0xB7);
		AB(0x1D);
		AD(&r.gpr[rS]);
		//xchg bl, bh
		ADD_BYTE(0x86);
		ADD_BYTE(0xDF);
		//mov [eax], bx
		ADD_BYTE(0x66);
		ADD_BYTE(0x89);
		ADD_BYTE(0x18);
	}
	add_label("end");

}

void Recompiler::_lhzu() {
	D_rD; D_rA; D_d16;
	if(rA == 0 || rA == rD)
		throw bad_form_exception(bad_form_exception::Invalid);

	//r.gpr[rD] = m.rh(rA_0 + d);
	add_mem1_rA0d(rA, d, false);
	if(rA != 0) {
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rh	//thiscall assumed
		AB(0xE8);
		WORD (Hardware::*temp)(WORD) = &Hardware::rh;
		REQUEST_CALL(MAKE(DWORD, temp));
		//movzx eax, ax //assumed that the high bytes is zeroed. NOT SO.
		AB(0x0F);
		AB(0xB7);
		AB(0xC0);

		add_mem2();
	}
	{
		add_label("no_hardware");
		//movzx eax, word [eax]
		ADD_BYTE(0x0F);
		ADD_BYTE(0xB7);
		ADD_BYTE(0x00);
		//xchg al, ah
		ADD_BYTE(0x86);
		ADD_BYTE(0xC4);
	}
	add_label("end");
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);

	//r.gpr[rA] += d;
	//add r.gpr[rA], d
	AB(0x81);
	AB(0x05);
	AD(&r.gpr[rA]);
	AD(d);
}

void Recompiler::_lhax() {
	D_rD; D_rA; D_rB;

	//r.gpr[rD] = (short)m.rh(rA_0 + r.gpr[rB]);
	add_mem1_x(rA, rB, false);
	if(rA != 0) {
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rh	//thiscall assumed
		AB(0xE8);
		WORD (Hardware::*temp)(WORD) = &Hardware::rh;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov ax, [eax]
		ADD_BYTE(0x66);
		AB(0x8B);
		ADD_BYTE(0x00);
		//xchg al, ah
		ADD_BYTE(0x86);
		ADD_BYTE(0xC4);
	}
	add_label("end");
	//movsx eax, ax
	AB(0x0F);
	AB(0xBF);
	AB(0xC0);
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);
}

void Recompiler::_stfsx() {
	D_frS; D_rA; D_rB;

	//m.ww(rA_0 + r.gpr[rB], MAKE(DWORD, GET_FPR_SINGLE(frS)));
	FLD_PS_D(frS);

	add_mem1_x(rA, rB, true);
	if(rA != 0) {
		//push (float)FPR_PS_D(frS)
		AB(0x83); AB(0xEC); AB(4);	//sub esp, 4
		AB(0xD9); AB(0x1C); AB(0x24); //fstp dword [esp]
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.ww	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, DWORD) = &Hardware::ww;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		AB(0xD9); AB(0x18);	//fstp dword [eax]

		//mov ebx, [eax]
		AB(0x8B);
		AB(0x18);
		//bswap ebx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xCB);
		//mov [eax], ebx
		AB(0x89);
		AB(0x18);
	}
	add_label("end");
}

void Recompiler::_lfsx() {
	D_frD; D_rA; D_rB;

	//r.fpr[frD].d = MAKE(float, m.rw(rA_0 + r.gpr[rB]));
	add_mem1_x(rA, rB, false);
	{
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rw	//thiscall assumed
		AB(0xE8);
		DWORD (Hardware::*temp)(WORD) = &Hardware::rw;
		REQUEST_CALL(MAKE(DWORD, temp));
		//fld eax //not possible; we must go through memory

		add_mem2();
	}
	{
		add_label("no_hardware");
		//r.fpr[frD].d = MAKE(float, swapw([eax]));
		//mov eax, [eax]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x00);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
	}
	add_label("end");
	//PS0(frD) = PS1(frD) = MAKE(float, eax);
	//fld eax //not possible; we must go through memory
	//mov [esp - 4], eax
	AB(0x89);
	AB(0x44);
	AB(0x24);
	AB(-4);
	//fld m32real [esp - 4]
	AB(0xD9);
	AB(0x44);
	AB(0x24);
	AB(-4);

	SET_FPR_SINGLE;
}

void Recompiler::_lfdu() {
	D_frD; D_rA; D_d16;
	if(rA == 0)
		throw bad_form_exception(bad_form_exception::Invalid);

	//r.fpr[frD].dword = m.rd(rA_0 + d);
	add_mem1_rA0d(rA, d, false);
	if(rA != 0) {
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rd	//thiscall assumed
		AB(0xE8);
		QWORD (Hardware::*temp)(WORD) = &Hardware::rd;
		REQUEST_CALL(MAKE(DWORD, temp));
		//mov r.fpr[frD].loword, eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.fpr[frD].loword);
		//mov r.fpr[frD].hiword, edx
		ADD_BYTE(0x89);
		AB(0x15);
		ADD_DWORD(&r.fpr[frD].hiword);

		add_mem2();
	}
	{
		add_label("no_hardware");
		//r.fpr[frD].dword = swapd([eax]);
		//mov ebx, eax
		AB(0x8B);
		AB(0xD8);

		//mov eax, [ebx]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x03);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
		//mov r.fpr[frD].hiword, eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.fpr[frD].hiword);

		//mov eax, [ebx + 4]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x43);
		AB(4);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
		//mov r.fpr[frD].loword, eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.fpr[frD].loword);
	}
	add_label("end");

	//r.gpr[rA] += d;
	//add r.gpr[rA], d
	AB(0x81);
	AB(0x05);
	AD(&r.gpr[rA]);
	AD(d);
}

void Recompiler::_dcbz() {
}

void Recompiler::_dcbst() {
}

void Recompiler::_lbzux() {
	D_rD; D_rA; D_rB;
	if(rA == 0 || rA == rD)
		throw bad_form_exception(bad_form_exception::Invalid);

	//r.gpr[rD] = m.rb(rA_0 + r.gpr[rB]);
	add_mem1_x(rA, rB, false);
	{
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rb	//thiscall assumed
		AB(0xE8);
		BYTE (Hardware::*temp)(WORD) = &Hardware::rb;
		REQUEST_CALL(MAKE(DWORD, temp));
		//movzx eax, al //assumed that the high bytes is zeroed. NOT SO.
		AB(0x0F);
		AB(0xB6);
		AB(0xC0);

		add_mem2();
	}
	{
		add_label("no_hardware");
		//movzx eax, byte [eax]
		ADD_BYTE(0x0F);
		ADD_BYTE(0xB6);
		ADD_BYTE(0x00);
	}
	add_label("end");
	if(rB == rD) {
		//DWORD temp = r.gpr[rB];
		//mov ebx, r.gpr[rB]
		AB(0x8B);
		AB(0x1D);
		AD(&r.gpr[rB]);
	}
	//mov r.gpr[rD], eax
	AB(0xA3);
	ADD_DWORD(&r.gpr[rD]);

	if(rB == rD) {
		//r.gpr[rA] += temp;
		//add r.gpr[rA], ebx
		AB(0x01);
		AB(0x1D);
		AD(&r.gpr[rA]);
	} else {
		//r.gpr[rA] += r.gpr[rB];
		//mov eax, r.gpr[rB]
		AB(0xA1);
		ADD_DWORD(&r.gpr[rB]);
		//add r.gpr[rA], eax
		AB(0x01);
		AB(0x05);
		AD(&r.gpr[rA]);
	}
}

void Recompiler::_psq_lu() {
	D_frD; D_rA; D_W; D_I; D_d20;
	if(rA == 0)
		throw bad_form_exception(bad_form_exception::Invalid);

	/*if(!W) {
	quantized_load_W0(frD, rA_0 + d, I);
	} else {
	quantized_load_W1(frD, rA_0 + d, I);
	}*/
	//push edi
	AB(0x57);
	//push I
	AB(0x68);
	AD(I);
	if(rA == 0) {
		//push d
		AB(0x68);
		AD(d);
	} else {
		//mov eax, r.gpr[rA]
		AB(0xA1);
		AD(&r.gpr[rA]);
		if(d != 0) {
			//add eax, d
			AB(0x05);
			AD(d);
		}
		//push eax
		AB(0x50);
	}
	//push frD
	AB(0x68);
	AD(frD);
	//mov ecx, this //the this pointer
	AB(0xB9);
	AD(this);
	//call quantized_load	//thiscall assumed
	AB(0xE8);
	void (Recompiler::*temp)(DWORD, DWORD, DWORD) =
		W ? &Recompiler::quantized_load_W1 : &Recompiler::quantized_load_W0;
	REQUEST_CALL(MAKE(DWORD, temp));
	//pop edi
	AB(0x5F);

	//r.gpr[rA] += d;
	//add r.gpr[rA], d
	AB(0x81);
	AB(0x05);
	AD(&r.gpr[rA]);
	AD(d);
}

void Recompiler::_sthu() {
	D_rS; D_rA; D_d16;
	if(rA == 0)
		throw bad_form_exception(bad_form_exception::Invalid);

	//m.ww(r.gpr[rA] + d, r.gpr[rS]);
	add_mem1_rA0d(rA, d, true);
	{
		//push r.gpr[rS]  //arguments are expanded to 32 bits as they're passed
		AB(0xFF);
		AB(0x35);
		AD(&r.gpr[rS]);
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.wh	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, WORD) = &Hardware::wh;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//movzx ebx, (WORD)r.gpr[rS]
		AB(0x0F);
		AB(0xB7);
		AB(0x1D);
		AD(&r.gpr[rS]);
		//xchg bl, bh
		ADD_BYTE(0x86);
		ADD_BYTE(0xDF);
		//mov [eax], bx
		ADD_BYTE(0x66);
		ADD_BYTE(0x89);
		ADD_BYTE(0x18);
	}
	add_label("end");

	//r.gpr[rA] += d;
	//add r.gpr[rA], d
	AB(0x81);
	AB(0x05);
	AD(&r.gpr[rA]);
	AD(d);
}

void Recompiler::_fnmadd() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPRD_FPRF(-(FPR_PS_D(frA) * FPR_PS_D(frC) + FPR_PS_D(frB)));
	SET_DISP8;
	FLD_PS_D_DISP8(frA);
	FMUL_PS_D_DISP8(frC);
	FADD_PS_D_DISP8(frB);
	//fchs
	AB(0xD9);
	AB(0xE0);
	add_set_fpscr_fprf();
	FSTPD_DISP8;
}

void Recompiler::_creqv() {
	D_crbD; D_crbA; D_crbB;

	//setflags(r.cr, makeflag(crbD), getbit(r.cr, crbA) == getbit(r.cr, crbB));
	//mov eax, r.cr
	AB(0xA1);
	AD(&r.cr);
	if(crbA == crbB) {  //Optimization. further opts for crbD == crbA and ==crbB are possible.
		//bts eax, 31-crbD
		AB(0x0F);
		AB(0xBA);
		AB(0xE8);
		AB((BYTE)31 - crbD);
	} else {
		//bt eax, 31-crbA
		AB(0x0F);
		AB(0xBA);
		AB(0xE0);
		AB((BYTE)31 - crbA);
		//jc crbA == 1
		AB(0x72);
		request_label("crbA == 1");
		{ //crbA == 0:
			//bt eax, 31-crbB
			AB(0x0F);
			AB(0xBA);
			AB(0xE0);
			AB((BYTE)31 - crbB);
			//jc clear
			AB(0x72);
			request_label("clear");
			//jmp set
			AB(0xEB);
			request_label("set");
		}
		{
			add_label("crbA == 1");
			//bt eax, 31-crbB
			AB(0x0F);
			AB(0xBA);
			AB(0xE0);
			AB((BYTE)31 - crbB);
			//jnc clear
			AB(0x73);
			request_label("clear");
			//jmp set
		}
		{
			add_label("set");
			//bts eax, 31-crbD
			AB(0x0F);
			AB(0xBA);
			AB(0xE8);
			AB((BYTE)31 - crbD);
			//jmp store
			AB(0xEB);
			request_label("store");
		}
		{
			add_label("clear");
			//btr eax, 31-crbD
			AB(0x0F);
			AB(0xBA);
			AB(0xF0);
			AB((BYTE)31 - crbD);
		}
	}

	add_label("store");
	//mov r.cr, eax
	AB(0xA3);
	AD(&r.cr);
}

void Recompiler::_fabs() {
	D_frD; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	/*//can be optimized for frD == frB
	//SET_FPRD(fabs(FPR_PS_D(frB)));
	FLD_PS_D(frB);
	//fabs
	AB(0xD9);
	AB(0xE1);

	FSTPD;*/

	//r.fpr[frD].hiword = r.fpr[frB].hiword & 0x7FFFFFFF;
	//r.fpr[frD].loword = r.fpr[frB].loword;
	if(frD == frB) {
		//btc r.fpr[frD].hiword, 31
		AB(0x0F);
		AB(0xBA);
		AB(0x3D);
		AD(&r.fpr[frD].hiword);
		AB(31);
	} else {
		throw bad_form_exception(bad_form_exception::Unemulated);
	}
}

void Recompiler::_mtsrin() {
	D_rS; D_rB;

	//r.sr[getbitsw(r.gpr[rB], 0, 3)] = r.gpr[rS];
	//mov eax, r.gpr[rB]
	AB(0xA1);
	AD(&r.gpr[rB]);
	//shr eax, 28
	AB(0xC1);
	AB(0xE8);
	AB(28);
	//mov ebx, r.gpr[rS]
	AB(0x8B);
	AB(0x1D);
	AD(&r.gpr[rS]);
	//mov (&r.sr + eax*4), ebx
	AB(0x89);
	AB(0x1C);
	AB(0x85);
	AD(&r.sr);
}

void Recompiler::_mfsr() {
	D_rD; D_SR;

	//r.gpr[rD] = r.sr[SR];
	//mov eax, r.sr[SR]
	AB(0xA1);
	AD(&r.sr[SR]);
	//mov r.gpr[rD], eax
	AB(0xA3);
	AD(&r.gpr[rD]);
}

void Recompiler::_mffs() {
	D_frD; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//r.fpr[frD].loword = r.fpscr;
	//mov eax, r.fpscr
	AB(0xA1);
	AD(&r.fpscr);
	//mov r.fpr[frD].loword, eax
	AB(0xA3);
	AD(&r.fpr[frD].loword);

	//r.fpr[frD].hiword = UNDEFINED_PPCWORD;
	//mov r.fpr[frD].hiword, UNDEFINED_PPCWORD
	AB(0xC7);
	AB(0x05);
	AD(&r.fpr[frD].hiword);
	AD(UNDEFINED_PPCWORD);
}

void Recompiler::_tlbie() {
	//D_rB;
}

//This is dark magic. :} Deep understanding of PPC/IA32 memory structure requred.
void Recompiler::_stswi() {
	D_rS; D_rA; D_NB;

	DWORD n = (NB == 0) ? 32 : NB;

	//while(n >= 4 && rS < 32) store 4 swapped bytes at a time
	add_mem1_rA0d(rA, 0, true);
	if(rA != 0) {
		//it's too much work to do hardware accesses for every word here
		//pop edi
		AB(0x5F);
		add_pos_fatal_error(REC_UNEMULATED_INSTRUCTION_FORM);
	}
	{
		add_label("no_hardware");
		//push edi
		AB(0x57);
		//cld
		AB(0xFC);
		//mov edi, eax
		AB(0x89);
		AB(0xC7);

		if(n >= 4) {
			//mov esi, &r.gpr[rS]
			AB(0xBE);
			AD(&r.gpr[rS]);
			//mov ecx, n / 4
			AB(0xB9);
			AD(n / 4);

			add_label("loop1");
			//lodsd	//mov eax, [esi]; esi+=4;
			AB(0xAD);
			//bswap eax
			ADD_BYTE(0x0F);
			ADD_BYTE(0xC8);
			//stosd	//mov [edi], eax; edi+=4;
			AB(0xAB);

			//This krap can be a little optimized, but who cares? :}
			if(rS + ((n + 3) / 4) >= 32) {  //If there's wraparound
				//cmp ecx, (n / 4) - (31 - rS)
				AB(0x83);
				AB(0xF9);
				AB((n / 4) - (31 - rS));
				//jne nowrap
				AB(0x75);
				request_label("nowrap");
				//sub esi, 32*4
				//add esi, -128
				AB(0x83);
				AB(0xC6);
				AB(-128); //just within signed byte limits :)
			}

			add_label("nowrap");
			//loop loop1
			AB(0xE2);
			request_label("loop1");
		}
		if(n % 4 != 0) {  //The last few (1 to 3) bytes
			if(n < 4) { //esi hasn't been loaded, we gotta do it now.
				//mov esi, DWORD(&r.gpr[rS]) + 3
				AB(0xBE);
				AD(DWORD(&r.gpr[rS]) + 3);
			} else {
				//add esi, 3
				AB(0x83);
				AB(0xC6);
				AB(3);
			}
			//mov ecx, n % 4
			AB(0xB9);
			AD(n % 4);

			add_label("loop2");
			//movsb	//mov byte [edi], byte [esi]; esi+=1; edi+=1;
			AB(0xA4);
			//add esi, -2 //'cause we're kinda going backwards
			AB(0x83);
			AB(0xC6);
			AB(-2);
			//loop loop2
			AB(0xE2);
			request_label("loop2");
		}
		//pop edi
		AB(0x5F);
	}
	add_label("end");
}

void Recompiler::_lwarx() {
	D_rD; D_rA; D_rB;

	//if(rA_0 + r.gpr[rB] % 4 != 0)
	//throw lazer_exception("Unaligned lwarx");

	//reserve_data = r.gpr[rD] = m.rw(rA_0 + r.gpr[rB]);
	add_mem1_x(rA, rB, false);
	{
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rw	//thiscall assumed
		AB(0xE8);
		DWORD (Hardware::*temp)(WORD) = &Hardware::rw;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov eax, [eax]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x00);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
	}
	add_label("end");
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);
	//mov reserve_data, eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&reserve_data);

	//reserve_bit = true;
	//mov reserve_bit, 1
	AB(0xC6);
	AB(0x05);
	AD(&reserve_bit);
	AB(1);
}

void Recompiler::_stwcx_() {
	D_rS; D_rA; D_rB;

	//if(rA_0 + r.gpr[rB] % 4 != 0)
	//throw lazer_exception("Unaligned stwcx.");

	//and r.cr(byte 3/0), 0x0F
	AB(0x80);
	AB(0x25);
	AD(DWORD(&r.cr) + 3);
	AB(0x0F);
	//if(reserve_bit) {	//temp
	//test reserve_bit, 0xFF
	AB(0xF6);
	AB(0x05);
	AD(&reserve_bit);
	AB(0xFF);
	//jz end
	AB(0x74);
	request_label("end");
	{
		//reserve_bit = false;
		//mov reserve_bit, 0
		AB(0xC6);
		AB(0x05);
		AD(&reserve_bit);
		AB(0);

		/*if(reserve_data == m.rw(rA_0 + r.gpr[rB])) {
		m.ww(rA_0 + r.gpr[rB], r.gpr[rS]);
		setflags(r.cr, CR0_EQ, true);
		}*/
		add_mem1_x(rA, rB, true);
		{
			//push eax
			AB(0x50);

			//push eax
			AB(0x50);
			//mov ecx, &h //the this pointer
			AB(0xB9);
			AD(&h);
			//call h.rw	//thiscall assumed
			AB(0xE8);
			DWORD (Hardware::*temp)(WORD) = &Hardware::rw;
			REQUEST_CALL(MAKE(DWORD, temp));

			//cmp eax, reserve_data	//temp
			AB(0x3B);
			AB(0x05);
			AD(&reserve_data);
			//pop eax
			AB(0x58);
			//jne mem2  //temp
			AB(0x75);
			request_label("mem2");
			{
				//push r.gpr[rS]
				AB(0xFF);
				AB(0x35);
				AD(&r.gpr[rS]);
				//push eax
				AB(0x50);
				//mov ecx, &h //the this pointer
				AB(0xB9);
				AD(&h);
				//call h.ww	//thiscall assumed
				AB(0xE8);
				void (Hardware::*temp)(WORD, DWORD) = &Hardware::ww;
				REQUEST_CALL(MAKE(DWORD, temp));

				//pop edi
				AB(0x5F);
				//jmp set_cr0eq
				AB(0xEB);
				request_label("set_cr0eq");
			}
			add_label("mem2");
			add_mem2();
		}
		{
			add_label("no_hardware");
			//setflags(cr, CR0_LT | CR0_GT | CR0_EQ | CR0_SO, false);
			//mov dl, r.cr(byte 3/0)
			AB(0x8A);
			AB(0x15);
			AD(DWORD(&r.cr) + 3);
			//and dl, 0x0F
			AB(0x80);
			AB(0xE2);
			AB(0x0F);
			/*//temp
			//mov ebx, r.gpr[rS]
			ADD_BYTE(0x8B);
			ADD_BYTE(0x1D);
			ADD_DWORD(&r.gpr[rS]);
			//bswap ebx
			ADD_BYTE(0x0F);
			ADD_BYTE(0xCB);
			//mov [eax], ebx
			ADD_BYTE(0x89);
			ADD_BYTE(0x18);*/
			//mov ecx, eax	//temp
			AB(0x8B);
			AB(0xC8);
			//mov eax, reserve_data
			AB(0xA1);
			AD(&reserve_data);
			//bswap eax
			AB(0x0F);
			AB(0xC8);
			//mov ebx, r.gpr[rS]
			ADD_BYTE(0x8B);
			ADD_BYTE(0x1D);
			ADD_DWORD(&r.gpr[rS]);
			//bswap ebx
			ADD_BYTE(0x0F);
			ADD_BYTE(0xCB);
			//cmpxchg [ecx], ebx
			AB(0x0F);
			AB(0xB1);
			AB(0x19);
			//jnz end
			AB(0x75);
			request_label("end");
		}
	} //}
	{
		add_label("set_cr0eq");
		//setflags(cr, CR0_EQ, true);
		//or r.cr(byte 3/0), 0x20
		AB(0x80);
		AB(0x0D);
		AD(DWORD(&r.cr) + 3);
		AB(0x20);
	}
	add_label("end");

	//setflags(r.cr, CR0_SO, getflag(r.xer, XER_SO));
	//bt r.xer, 31  //is bit 0 in PPC
	AB(0x0F);
	AB(0xBA);
	AB(0x25);
	AD(&r.xer);
	AB(31);
	//jnc no_so
	AB(0x73);
	request_label("no_so");
	{ //so:
		//or r.cr(byte 3/0), 0x10
		AB(0x80);
		AB(0x0D);
		AD(DWORD(&r.cr) + 3);
		AB(0x10);
	}
	add_label("no_so");
}

void Recompiler::_lwzux() {
	D_rD; D_rA; D_rB;
	if(rA == 0 || rA == rD)
		throw bad_form_exception(bad_form_exception::Invalid);

	//r.gpr[rD] = m.rw(rA_0 + r.gpr[rB]);
	add_mem1_x(rA, rB, false);
	{
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rw	//thiscall assumed
		AB(0xE8);
		DWORD (Hardware::*temp)(WORD) = &Hardware::rw;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov eax, [eax]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x00);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
	}
	add_label("end");
	if(rB == rD) {
		//DWORD temp = r.gpr[rB];
		//mov ebx, r.gpr[rB]
		AB(0x8B);
		AB(0x1D);
		AD(&r.gpr[rB]);
	}
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);

	if(rB == rD) {
		//r.gpr[rA] += temp;
		//add r.gpr[rA], ebx
		AB(0x01);
		AB(0x1D);
		AD(&r.gpr[rA]);
	} else {
		//r.gpr[rA] += r.gpr[rB];
		//mov eax, r.gpr[rB]
		AB(0xA1);
		ADD_DWORD(&r.gpr[rB]);
		//add r.gpr[rA], eax
		AB(0x01);
		AB(0x05);
		AD(&r.gpr[rA]);
	}
}

void Recompiler::_dcbt() {
	//D_rA; D_rB;
}

//This is dark magic. :}
void Recompiler::_lswi() {
	D_rD; D_rA; D_NB;

	DWORD n = (NB == 0) ? 32 : NB;

	//while(n >= 4 && rS < 32) store 4 swapped bytes at a time
	add_mem1_rA0d(rA, 0, false);
	if(rA != 0) {
		//it's too much work to do hardware accesses for every word here
		//pop edi
		AB(0x5F);
		add_pos_fatal_error(REC_UNEMULATED_INSTRUCTION_FORM);
	}
	{
		add_label("no_hardware");
		//push edi
		AB(0x57);
		//cld
		AB(0xFC);
		//mov esi, eax
		AB(0x89);
		AB(0xC6);

		if(n >= 4) {
			//mov edi, &r.gpr[rD]
			AB(0xBF);
			AD(&r.gpr[rD]);
			//mov ecx, n / 4
			AB(0xB9);
			AD(n / 4);

			add_label("loop1");
			//lodsd	//mov eax, [esi]; esi+=4;
			AB(0xAD);
			//bswap eax
			ADD_BYTE(0x0F);
			ADD_BYTE(0xC8);
			//stosd	//mov [edi], eax; edi+=4;
			AB(0xAB);

			//This krap can be a little optimized, but who cares? :}
			if(rD + ((n + 3) / 4) >= 32) {  //If there's wraparound
				//cmp ecx, (n / 4) - (31 - rS)
				AB(0x83);
				AB(0xF9);
				AB((n / 4) - (31 - rD));
				//jne nowrap
				AB(0x75);
				request_label("nowrap");
				//sub edi, 32*4
				//add edi, -128
				AB(0x83);
				AB(0xC7);
				AB(-128); //just within signed byte limits
			}

			add_label("nowrap");
			//loop loop1
			AB(0xE2);
			request_label("loop1");
		}
		if(n % 4 != 0) {  //The last few (1 to 3) bytes
			if(n < 4) { //edi hasn't been loaded, we gotta do it now.
				//mov edi, DWORD(&r.gpr[rD]) + 3
				AB(0xBF);
				AD(DWORD(&r.gpr[rD]) + 3);
			} else {
				//add edi, 3
				AB(0x83);
				AB(0xC7);
				AB(3);
			}
			//mov ecx, n % 4
			AB(0xB9);
			AD(n % 4);

			add_label("loop2");
			//movsb	//mov byte [edi], byte [esi]; esi+=1; edi+=1;
			AB(0xA4);
			//add edi, -2 //'cause we're kinda going backwards
			AB(0x83);
			AB(0xC7);
			AB(-2);
			//loop loop2
			AB(0xE2);
			request_label("loop2");

			//this can probably be optimized too
			//and r.gpr[(rD + (n/4)) % 32], makemaskw(0, (n % 4) * 8 - 1);
			AB(0x81);
			AB(0x25);
			AD(&r.gpr[(rD + (n/4)) % 32]);
			AD(makemaskw(0, (n % 4) * 8 - 1));
		}
		//pop edi
		AB(0x5F);
	}
	add_label("end");
}

void Recompiler::_dcbtst() {
	//D_rA; D_rB;
}


void Recompiler::_rlwnm() {
	D_rS; D_rA; D_rB; D_MB; D_ME; D_Rc;

	//r.gpr[rA] = _rotl(r.gpr[rS], getbitsw(r.gpr[rB], 27, 31)) & makemaskw(MB, ME);
	//mov eax, r.gpr[rS]
	AB(0xA1);
	AD(&r.gpr[rS]);
	//mov cl, (BYTE)r.gpr[rB] //byte 0/3
	AB(0x8A);
	AB(0x0D);
	AD(&r.gpr[rB]);
	//and cl, 0x1F
	AB(0x80);
	AB(0xE1);
	AB(0x1F);
	//rol eax, cl
	AB(0xD3);
	AB(0xC0);
	//and eax, makemaskw(MB, ME)
	AB(0x25);
	AD(makemaskw(MB, ME));
	//mov r.gpr[rA], eax
	AB(0xA3);
	AD(&r.gpr[rA]);

	if(Rc)
		add_setcr0();
}

void Recompiler::_stwux() {
	D_rS; D_rA; D_rB;
	if(rA == 0)
		throw bad_form_exception(bad_form_exception::Invalid);

	//m.ww(r.gpr[rA] + r.gpr[rB], r.gpr[rS]);
	add_mem1_x(rA, rB, true);
	{
		//push r.gpr[rS]
		AB(0xFF);
		AB(0x35);
		AD(&r.gpr[rS]);
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.ww	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, DWORD) = &Hardware::ww;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov ebx, r.gpr[rS]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x1D);
		ADD_DWORD(&r.gpr[rS]);
		//bswap ebx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xCB);
		//mov [eax], ebx
		ADD_BYTE(0x89);
		ADD_BYTE(0x18);
	}
	add_label("end");

	//r.gpr[rA] += r.gpr[rB];
	//mov eax, r.gpr[rB]
	AB(0xA1);
	AD(&r.gpr[rB]);
	//add r.gpr[rA], eax
	AB(0x01);
	AB(0x05);
	AD(&r.gpr[rA]);
}

void Recompiler::_stwbrx() {
	D_rS; D_rA; D_rB;

	//m.ww(rA_0 + r.gpr[rB], swapw(r.gpr[rS]));
	add_mem1_x(rA, rB, true);
	{
		//mov ebx, r.gpr[rS]
		AB(0x8B);
		AB(0x1D);
		AD(&r.gpr[rS]);
		//bswap ebx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xCB);
		//push ebx
		AB(0x53);
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.ww	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, DWORD) = &Hardware::ww;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov ebx, r.gpr[rS]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x1D);
		ADD_DWORD(&r.gpr[rS]);
		//mov [eax], ebx
		ADD_BYTE(0x89);
		ADD_BYTE(0x18);
	}
	add_label("end");
}

void Recompiler::_mfsrin() {
	D_rD; D_rB;

	//r.gpr[rS] = r.sr[getbitsw(r.gpr[rB], 0, 3)];
	//mov eax, r.gpr[rB]
	AB(0xA1);
	AD(&r.gpr[rB]);
	//shr eax, 28
	AB(0xC1);
	AB(0xE8);
	AB(28);
	//mov ebx, (&r.sr + eax*4)
	AB(0x8B);
	AB(0x1C);
	AB(0x85);
	AD(&r.sr);
	//mov r.gpr[rD], ebx
	AB(0x89);
	AB(0x1D);
	AD(&r.gpr[rD]);
}

void Recompiler::_fsel() {
	D_frD; D_frA; D_frB; D_frC; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPRD(FPR_PS_D((FPR_PS_D(frA) >= 0.0) ? frC : frB));

	{
		//fldz  2
		AB(0xD9);
		AB(0xEE);
		//fcomp r.fpr[frA].d  6
		AB(0xDC);
		AB(0x1D);
		AD(&r.fpr[frA].d);
		//fnstsw ax	  2
		AB(0xDF);
		AB(0xE0);
		//test ah, 0x45 //C3 | C2 | C0
		AB(0xF6);
		AB(0xC4);
		AB(0x45);
	}

	//jz less //!(frA >= 0.0)
	AB(0x74);
	request_label("less");
	{ //more_or_equal:
		//mov esi, &r.fpr[frC]
		AB(0xBE);
		AD(&r.fpr[frC]);
		//jmp copy
		AB(0xEB);
		request_label("copy");
	}
	{
		add_label("less");
		//mov esi, &r.fpr[frB]
		AB(0xBE);
		AD(&r.fpr[frB]);
	}

	add_label("copy");
	//cld
	AB(0xFC);
	//push edi
	AB(0x57);
	//mov edi, &r.fpr[frD]
	AB(0xBF);
	AD(&r.fpr[frD]);
	//movsd
	AB(0xA5);
	//movsd
	AB(0xA5);
	//pop edi
	AB(0x5F);
}

void Recompiler::_psq_l() {
	D_frD; D_rA; D_W; D_I; D_d20;

	/*if(!W) {
	quantized_load_W0(frD, rA_0 + d, I);
	} else {
	quantized_load_W1(frD, rA_0 + d, I);
	}*/
	//push edi
	AB(0x57);
	//push I
	AB(0x68);
	AD(I);
	if(rA == 0) {
		//push d
		AB(0x68);
		AD(d);
	} else {
		//mov eax, r.gpr[rA]
		AB(0xA1);
		AD(&r.gpr[rA]);
		if(d != 0) {
			//add eax, d
			AB(0x05);
			AD(d);
		}
		//push eax
		AB(0x50);
	}
	//push frD
	AB(0x68);
	AD(frD);
	//mov ecx, this //the this pointer
	AB(0xB9);
	AD(this);
	//call quantized_load	//thiscall assumed
	AB(0xE8);
	void (Recompiler::*temp)(DWORD, DWORD, DWORD) =
		W ? &Recompiler::quantized_load_W1 : &Recompiler::quantized_load_W0;
	REQUEST_CALL(MAKE(DWORD, temp));
	//pop edi
	AB(0x5F);
}

void Recompiler::_psq_st() {
	D_frS; D_rA; D_W; D_I; D_d20;

	/*if(!W)
	quantized_store_W0(rA_0 + d, I, frS);
	else
	quantized_store_W1(rA_0 + d, I, frS);*/
	//push edi
	AB(0x57);
	//push frS
	AB(0x68);
	AD(frS);
	//push I
	AB(0x68);
	AD(I);
	if(rA == 0) {
		//push d
		AB(0x68);
		AD(d);
	} else {
		//mov eax, r.gpr[rA]
		AB(0xA1);
		AD(&r.gpr[rA]);
		if(d != 0) {
			//add eax, d
			AB(0x05);
			AD(d);
		}
		//push eax
		AB(0x50);
	}
	//mov ecx, this //the this pointer
	AB(0xB9);
	AD(this);
	//call W ? quantized_store_W1 : quantized_store_W0	//thiscall assumed
	AB(0xE8);
	void (Recompiler::*temp)(DWORD, DWORD, DWORD) =
		W ? &Recompiler::quantized_store_W1 : &Recompiler::quantized_store_W0;
	REQUEST_CALL(MAKE(DWORD, temp));
	//pop edi
	AB(0x5F);
}

void Recompiler::_fcmpo() {
	D_crfD; D_frA; D_frB;

	FLD_PS_D(frB);  //load into ST(1)
	FLD_PS_D(frA);  //load into ST(0)
	//fucompp //Compare ST(0) with ST(1) and pop register stack twice
	AB(0xDA);
	AB(0xE9);
	//fnstsw ax
	AB(0xDF);
	AB(0xE0);
	//sahf  (C3, C2, C0) => ZF, PF, CF
	AB(0x9E);
	//jp unordered
	AB(0x7A);
	request_label("unordered");
	//jc less
	AB(0x72);
	request_label("less");
	//jz equal
	AB(0x74);
	request_label("equal");
	{ //greater:
		//mov al, 0x40
		AB(0xB0);
		AB(0x40);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("unordered");
		//mov al, 0x10
		AB(0xB0);
		AB(0x10);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("less");
		//mov al, 0x80
		AB(0xB0);
		AB(0x80);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("equal");
		//mov al, 0x20
		AB(0xB0);
		AB(0x20);
	}

	add_label("store");
	add_set_fpscr_fpcc_by_al();
	add_setal2cr(crfD);
}

void Recompiler::_ps_merge01() {
	D_frD; D_frA; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//PS0(frD) = PS0(frA);
	//PS1(frD) = PS1(frB);

	//fld PS0(frA)
	AB(0xDD);
	AB(0x05);
	AD(&PS0(frA));
	//fstp PS0(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS0(frD));
	//fld PS1(frB)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frB));
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_merge10() {
	D_frD; D_frA; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//double temp = PS0(frB);
	//PS0(frD) = PS1(frA);
	//PS1(frD) = temp;

	//fld PS0(frB)
	AB(0xDD);
	AB(0x05);
	AD(&PS0(frB));
	//fld PS1(frA)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frA));
	//fstp PS0(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS0(frD));
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_mr() {
	D_frD; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//PS0(frD) = PS0(frB);
	//PS1(frD) = PS1(frB);

	//fld PS0(frB)
	AB(0xDD);
	AB(0x05);
	AD(&PS0(frB));
	//fstp PS0(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS0(frD));
	//fld PS1(frB)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frB));
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_stfsu() {
	D_frS; D_rA; D_d16;
	if(rA == 0)
		throw bad_form_exception(bad_form_exception::Invalid);

	//m.ww(rA_0 + d, MAKE(DWORD, GET_FPR_SINGLE(frS)));
	FLD_PS_D(frS);

	add_mem1_rA0d(rA, d, true);
	if(rA != 0) {
		//push (float)FPR_PS_D(frS)
		AB(0x83); AB(0xEC); AB(4);	//sub esp, 4
		AB(0xD9); AB(0x1C); AB(0x24); //fstp dword [esp]
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.ww	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, DWORD) = &Hardware::ww;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		AB(0xD9); AB(0x18);	//fstp dword [eax]

		//mov ebx, [eax]
		AB(0x8B);
		AB(0x18);
		//bswap ebx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xCB);
		//mov [eax], ebx
		AB(0x89);
		AB(0x18);
	}
	add_label("end");

	//r.gpr[rA] += d;
	//add r.gpr[rA], d
	AB(0x81);
	AB(0x05);
	AD(&r.gpr[rA]);
	AD(d);
}

void Recompiler::_stfdx() {
	D_frS; D_rA; D_rB;

	//m.wd(rA_0 + r.gpr[rB], MAKE(QWORD, FPR_PS_D(frS)));
	FLD_PS_D(frS);

	add_mem1_x(rA, rB, true);
	if(rA != 0) {
		//push (double)FPR_PS_D(frS)
		AB(0x83); AB(0xEC); AB(8);	//sub esp, 8
		AB(0xDD); AB(0x1C); AB(0x24); //fstp qword [esp]
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.wd	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, QWORD) = &Hardware::wd;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//[eax] = swapd(MAKE(QWORD, FPR_PS_D(frS)));
		AB(0xDD); AB(0x18);	//fstp qword [eax]

		//mov ebx, [eax]
		AB(0x8B);
		AB(0x18);
		//mov ecx, [eax + 4]
		AB(0x8B);
		AB(0x48);
		AB(4);
		//bswap ebx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xCB);
		//bswap ecx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC9);
		//mov [eax + 4], ebx
		AB(0x89);
		AB(0x58);
		AB(4);
		//mov [eax], ecx
		AB(0x89);
		AB(0x08);
	}
	add_label("end");
}

void Recompiler::_stbux() {
	D_rS; D_rA; D_rB;
	if(rA == 0)
		throw bad_form_exception(bad_form_exception::Invalid);

	//m.wb(r.gpr[rA] + r.gpr[rB], (BYTE)r.gpr[rS]);
	add_mem1_x(rA, rB, true);
	{
		//push r.gpr[rS]  //arguments are expanded to 32 bits as they're passed
		AB(0xFF);
		AB(0x35);
		AD(&r.gpr[rS]);
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.wb	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, BYTE) = &Hardware::wb;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov bl, r.gpr[rS] //this should load the least-significant byte
		ADD_BYTE(0x8A);
		ADD_BYTE(0x1D);
		ADD_DWORD(&r.gpr[rS]);
		//mov [eax], bl
		ADD_BYTE(0x88);
		ADD_BYTE(0x18);
	}
	add_label("end");

	//r.gpr[rA] += r.gpr[rB];
	//mov eax, r.gpr[rB]
	AB(0xA1);
	AD(&r.gpr[rB]);
	//add r.gpr[rA], eax
	AB(0x01);
	AB(0x05);
	AD(&r.gpr[rA]);
}

void Recompiler::_ps_mul() {
	D_frD; D_frA; D_frC; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	SET_DISP8;

	//r.set_fpscr_fprf(PS0(frD) = PS0(frA) * PS0(frC));
	//fld PS0(frA)
	AB(0xDD);
	AB(0x43);
	ADD_DISP8(&PS0(frA));
	//fmul PS0(frC)
	AB(0xDC);
	AB(0x4B);
	ADD_DISP8(&PS0(frC));

	add_set_fpscr_fprf(); //We need to to this before the stack is popped and we lose the data

	//fstp PS0(frD)
	AB(0xDD);
	AB(0x5B);
	ADD_DISP8(&PS0(frD));

	//PS1(frD) = PS1(frA) * PS1(frC);
	//fld PS1(frA)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frA));
	//fmul PS1(frC)
	AB(0xDC);
	AB(0x0D);
	AD(&PS1(frC));
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_madd() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	SET_DISP8;

	//r.set_fpscr_fprf(PS0(frD) = PS0(frA) * PS0(frC) + PS0(frB));
	//fld PS0(frA)
	AB(0xDD);
	AB(0x43);
	ADD_DISP8(&PS0(frA));
	//fmul PS0(frC)
	AB(0xDC);
	AB(0x4B);
	ADD_DISP8(&PS0(frC));
	//fadd PS0(frB)
	AB(0xDC);
	AB(0x43);
	ADD_DISP8(&PS0(frB));

	add_set_fpscr_fprf(); //We need to to this before the stack is popped and we lose the data

	//fstp PS0(frD)
	AB(0xDD);
	AB(0x5B);
	ADD_DISP8(&PS0(frD));

	//PS1(frD) = PS1(frA) * PS1(frC) + PS1(frB);
	//fld PS1(frA)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frA));
	//fmul PS1(frC)
	AB(0xDC);
	AB(0x0D);
	AD(&PS1(frC));
	//fadd PS1(frB)
	AB(0xDC);
	AB(0x05);
	AD(&PS1(frB));
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_sum0() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	SET_DISP8;

	//r.set_fpscr_fprf(PS0(frD) = PS0(frA) + PS1(frB));
	//fld PS0(frA)
	AB(0xDD);
	AB(0x43);
	ADD_DISP8(&PS0(frA));
	//fadd PS1(frB)
	AB(0xDC);
	AB(0x05);
	AD(&PS1(frB));

	add_set_fpscr_fprf(); //We need to to this before the stack is popped and we lose the data

	//fstp PS0(frD)
	AB(0xDD);
	AB(0x5B);
	ADD_DISP8(&PS0(frD));

	//PS1(frD) = PS1(frC);
	//fld PS1(frC)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frC));
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_frsqrte() {
	D_frD; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	SET_DISP8;

	//fld1  //becomes st1
	AB(0xD9);
	AB(0xE8);
	//SET_FPRD_FPRF(1.0 / sqrt(FPR_PS_D(frB)));
	FLD_PS_D(frB);
	//fsqrt //becomes st0
	AB(0xD9);
	AB(0xFA);
	//fdivp ST(1), ST(0)	//result becomes st0
	AB(0xDE);
	AB(0xF9);

	add_set_fpscr_fprf();

	FSTPD_DISP8;
}

void Recompiler::_fnmsubs() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPR_SINGLE_FPRF(-(FPR_PS_D(frA) * FPR_PS_D(frC) - FPR_PS_D(frB)));
	SET_DISP8;
	FLD_PS_D_DISP8(frA);
	FMUL_PS_D_DISP8(frC);
	FSUB_PS_D_DISP8(frB);
	//fchs
	AB(0xD9);
	AB(0xE0);
	add_set_fpscr_fprf();
	SET_FPR_SINGLE_DISP8;
}

void Recompiler::__wc_debug() {
	//this->wc_debug();
	//push edi
	AB(0x57);
	//mov ecx, this
	AB(0xB9);
	AD(this);
	//call wc_debug //thiscall assumed
	AB(0xE8);
	void (Recompiler::*temp)() = &Recompiler::wc_debug;
	REQUEST_CALL(MAKE(DWORD, temp));
	//pop edi
	AB(0x5F);
}

void Recompiler::_ps_muls0() {
	D_frD; D_frA; D_frC; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//double temp = PS1(frA) * PS0(frC);
	//r.set_fpscr_fprf(PS0(frD) = PS0(frA) * PS0(frC));
	//PS1(frD) = temp;

	SET_DISP8;
	//fld PS1(frA)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frA));
	//fmul PS0(frC)
	AB(0xDC);
	AB(0x4B);
	ADD_DISP8(&PS0(frC));

	//fld PS0(frA)
	AB(0xDD);
	AB(0x43);
	ADD_DISP8(&PS0(frA));
	//fmul PS0(frC)
	AB(0xDC);
	AB(0x4B);
	ADD_DISP8(&PS0(frC));

	add_set_fpscr_fprf(); //We need to to this before the stack is popped and we lose the data

	//fstp PS0(frD)
	AB(0xDD);
	AB(0x5B);
	ADD_DISP8(&PS0(frD));
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_muls1() {
	D_frD; D_frA; D_frC; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	SET_DISP8;

	//r.set_fpscr_fprf(PS0(frD) = PS0(frA) * PS1(frC));
	//fld PS0(frA)
	AB(0xDD);
	AB(0x43);
	ADD_DISP8(&PS0(frA));
	//fmul PS1(frC)
	AB(0xDC);
	AB(0x0D);
	AD(&PS1(frC));

	add_set_fpscr_fprf(); //We need to to this before the stack is popped and we lose the data

	//fstp PS0(frD)
	AB(0xDD);
	AB(0x5B);
	ADD_DISP8(&PS0(frD));

	//PS1(frD) = PS1(frA) * PS1(frC);
	//fld PS1(frA)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frA));
	//fmul PS1(frC)
	AB(0xDC);
	AB(0x0D);
	AD(&PS1(frC));
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_msub() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	SET_DISP8;

	//r.set_fpscr_fprf(PS0(frD) = PS0(frA) * PS0(frC) - PS0(frB));
	//fld PS0(frA)
	AB(0xDD);
	AB(0x43);
	ADD_DISP8(&PS0(frA));
	//fmul PS0(frC)
	AB(0xDC);
	AB(0x4B);
	ADD_DISP8(&PS0(frC));
	//fsub PS0(frB)
	AB(0xDC);
	AB(0x63);
	ADD_DISP8(&PS0(frB));

	add_set_fpscr_fprf(); //We need to to this before the stack is popped and we lose the data

	//fstp PS0(frD)
	AB(0xDD);
	AB(0x5B);
	ADD_DISP8(&PS0(frD));

	//PS1(frD) = PS1(frA) * PS1(frC) - PS1(frB);
	//fld PS1(frA)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frA));
	//fmul PS1(frC)
	AB(0xDC);
	AB(0x0D);
	AD(&PS1(frC));
	//fsub PS1(frB)
	AB(0xDC);
	AB(0x25);
	AD(&PS1(frB));
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_neg() {
	D_frD; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//PS0(frD) = -PS0(frB);
	//PS1(frD) = -PS1(frB);
	if(frD == frB) {
		//could be optimized to do only the high byte? no.
		//btc PS0(frD), 31
		AB(0x0F);
		AB(0xBA);
		AB(0x3D);
		AD(((DWORD)&PS0(frD)) + 4);
		AB(31);
		//btc PS1(frD), 31
		AB(0x0F);
		AB(0xBA);
		AB(0x3D);
		AD(((DWORD)&PS1(frD)) + 4);
		AB(31);
	} else {
		SET_DISP8;

		//fld PS0(frB)
		AB(0xDD);
		AB(0x43);
		ADD_DISP8(&PS0(frB));
		//fchs
		AB(0xD9);
		AB(0xE0);
		//fstp PS0(frD)
		AB(0xDD);
		AB(0x5B);
		ADD_DISP8(&PS0(frD));
		//fld PS1(frB)
		AB(0xDD);
		AB(0x05);
		AD(&PS1(frB));
		//fchs
		AB(0xD9);
		AB(0xE0);
		//fstp PS1(frD)
		AB(0xDD);
		AB(0x1D);
		AD(&PS1(frD));
	}
}

void Recompiler::_lfsu() {
	D_frD; D_rA; D_d16;
	if(rA == 0)
		throw interp_fatal_exception("Invalid instruction form");

	//r.fpr[frD].d = MAKE(float, m.rw(rA_0 + d));
	add_mem1_rA0d(rA, d, false);
	if(rA != 0) {
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rw	//thiscall assumed
		AB(0xE8);
		DWORD (Hardware::*temp)(WORD) = &Hardware::rw;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//r.fpr[frD].d = MAKE(float, swapw([eax]));
		//mov eax, [eax]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x00);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
	}
	add_label("end");
	//PS0(frD) = PS1(frD) = MAKE(float, eax);
	//fld eax //not possible; we must go through memory
	//mov [esp - 4], eax
	AB(0x89);
	AB(0x44);
	AB(0x24);
	AB(-4);
	//fld m32real [esp - 4]
	AB(0xD9);
	AB(0x44);
	AB(0x24);
	AB(-4);

	SET_FPR_SINGLE;

	//r.gpr[rA] += d;
	//add r.gpr[rA], d
	AB(0x81);
	AB(0x05);
	AD(&r.gpr[rA]);
	AD(d);
}

void Recompiler::_eieio() {
}

void Recompiler::_ps_merge00() {
	D_frD; D_frA; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//double temp = PS0(frB);
	//PS0(frD) = PS0(frA);
	//PS1(frD) = temp;

	//fld PS0(frB)
	AB(0xDD);
	AB(0x05);
	AD(&PS0(frB));
	//fld PS0(frA)
	AB(0xDD);
	AB(0x05);
	AD(&PS0(frA));
	//fstp PS0(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS0(frD));
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_merge11() {
	D_frD; D_frA; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//PS0(frD) = PS1(frA);
	//PS1(frD) = PS1(frB);

	//fld PS1(frA)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frA));
	//fstp PS0(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS0(frD));
	//fld PS1(frB)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frB));
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_fnmadds() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPR_SINGLE_FPRF(-(FPR_PS_D(frA) * FPR_PS_D(frC) + FPR_PS_D(frB)));
	SET_DISP8;
	FLD_PS_D_DISP8(frA);
	FMUL_PS_D_DISP8(frC);
	FADD_PS_D_DISP8(frB);
	//fchs
	AB(0xD9);
	AB(0xE0);
	add_set_fpscr_fprf();
	SET_FPR_SINGLE_DISP8;
}

void Recompiler::_ps_madds0() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//double temp = PS1(frA) * PS0(frC) + PS0(frB);
	//r.set_fpscr_fprf(PS0(frD) = PS0(frA) * PS0(frC) + PS1(frB));
	//PS1(frD) = temp;

	SET_DISP8;
	//fld PS1(frA)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frA));
	//fmul PS0(frC)
	AB(0xDC);
	AB(0x4B);
	ADD_DISP8(&PS0(frC));
	//fadd PS1(frB)
	AB(0xDC);
	AB(0x05);
	AD(&PS1(frB));

	//fld PS0(frA)
	AB(0xDD);
	AB(0x43);
	ADD_DISP8(&PS0(frA));
	//fmul PS0(frC)
	AB(0xDC);
	AB(0x4B);
	ADD_DISP8(&PS0(frC));
	//fadd PS0(frB)
	AB(0xDC);
	AB(0x43);
	ADD_DISP8(&PS0(frB));

	add_set_fpscr_fprf(); //We need to to this before the stack is popped and we lose the data

	//fstp PS0(frD)
	AB(0xDD);
	AB(0x5B);
	ADD_DISP8(&PS0(frD));
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_madds1() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	SET_DISP8;

	//r.set_fpscr_fprf(PS0(frD) = PS0(frA) * PS1(frC) + PS0(frB));
	//fld PS0(frA)
	AB(0xDD);
	AB(0x43);
	ADD_DISP8(&PS0(frA));
	//fmul PS1(frC)
	AB(0xDC);
	AB(0x0D);
	AD(&PS1(frC));
	//fadd PS0(frB)
	AB(0xDC);
	AB(0x43);
	ADD_DISP8(&PS0(frB));

	add_set_fpscr_fprf(); //We need to to this before the stack is popped and we lose the data

	//fstp PS0(frD)
	AB(0xDD);
	AB(0x5B);
	ADD_DISP8(&PS0(frD));

	//PS1(frD) = PS1(frA) * PS1(frC) + PS1(frB);
	//fld PS1(frA)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frA));
	//fmul PS1(frC)
	AB(0xDC);
	AB(0x0D);
	AD(&PS1(frC));
	//fadd PS1(frB)
	AB(0xDC);
	AB(0x05);
	AD(&PS1(frB));
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_add() {
	D_frD; D_frA; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	SET_DISP8;

	//r.set_fpscr_fprf(PS0(frD) = PS0(frA) + PS0(frB));
	//fld PS0(frA)
	AB(0xDD);
	AB(0x43);
	ADD_DISP8(&PS0(frA));
	//fadd PS0(frB)
	AB(0xDC);
	AB(0x43);
	ADD_DISP8(&PS0(frB));

	add_set_fpscr_fprf(); //We need to to this before the stack is popped and we lose the data

	//fstp PS0(frD)
	AB(0xDD);
	AB(0x5B);
	ADD_DISP8(&PS0(frD));

	//PS1(frD) = PS1(frA) + PS1(frB);
	//fld PS1(frA)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frA));
	//fadd PS1(frB)
	AB(0xDC);
	AB(0x05);
	AD(&PS1(frB));
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_sub() {
	D_frD; D_frA; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	SET_DISP8;

	//r.set_fpscr_fprf(PS0(frD) = PS0(frA) - PS0(frB));
	//fld PS0(frA)
	AB(0xDD);
	AB(0x43);
	ADD_DISP8(&PS0(frA));
	//fsub PS0(frB)
	AB(0xDC);
	AB(0x63);
	ADD_DISP8(&PS0(frB));

	add_set_fpscr_fprf(); //We need to to this before the stack is popped and we lose the data

	//fstp PS0(frD)
	AB(0xDD);
	AB(0x5B);
	ADD_DISP8(&PS0(frD));

	//PS1(frD) = PS1(frA) - PS1(frB);
	//fld PS1(frA)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frA));
	//fsub PS1(frB)
	AB(0xDC);
	AB(0x25);
	AD(&PS1(frB));
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_cmpo0() {
	D_crfD; D_frA; D_frB;

	//fld PS0(frB)	//load into ST(1)
	AB(0xDD);
	AB(0x05);
	AD(&PS0(frB));
	//fld PS0(frA)	//load into ST(0)
	AB(0xDD);
	AB(0x05);
	AD(&PS0(frA));
	//fucompp //Compare ST(0) with ST(1) and pop register stack twice
	AB(0xDA);
	AB(0xE9);
	//fnstsw ax
	AB(0xDF);
	AB(0xE0);
	//sahf  (C3, C2, C0) => ZF, PF, CF
	AB(0x9E);
	//jp unordered
	AB(0x7A);
	request_label("unordered");
	//jc less
	AB(0x72);
	request_label("less");
	//jz equal
	AB(0x74);
	request_label("equal");
	{ //greater:
		//mov al, 0x40
		AB(0xB0);
		AB(0x40);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("unordered");
		//mov al, 0x10
		AB(0xB0);
		AB(0x10);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("less");
		//mov al, 0x80
		AB(0xB0);
		AB(0x80);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("equal");
		//mov al, 0x20
		AB(0xB0);
		AB(0x20);
	}

	add_label("store");
	add_set_fpscr_fpcc_by_al();
	add_setal2cr(crfD);
}

void Recompiler::_ps_cmpo1() {
	D_crfD; D_frA; D_frB;

	//fld PS1(frB)	//load into ST(1)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frB));
	//fld PS1(frA)	//load into ST(0)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frA));
	//fucompp //Compare ST(0) with ST(1) and pop register stack twice
	AB(0xDA);
	AB(0xE9);
	//fnstsw ax
	AB(0xDF);
	AB(0xE0);
	//sahf  (C3, C2, C0) => ZF, PF, CF
	AB(0x9E);
	//jp unordered
	AB(0x7A);
	request_label("unordered");
	//jc less
	AB(0x72);
	request_label("less");
	//jz equal
	AB(0x74);
	request_label("equal");
	{ //greater:
		//mov al, 0x40
		AB(0xB0);
		AB(0x40);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("unordered");
		//mov al, 0x10
		AB(0xB0);
		AB(0x10);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("less");
		//mov al, 0x80
		AB(0xB0);
		AB(0x80);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("equal");
		//mov al, 0x20
		AB(0xB0);
		AB(0x20);
	}

	add_label("store");
	add_set_fpscr_fpcc_by_al();
	add_setal2cr(crfD);
}

void Recompiler::_ps_cmpu0() {
	D_crfD; D_frA; D_frB;

	//fld PS0(frB)	//load into ST(1)
	AB(0xDD);
	AB(0x05);
	AD(&PS0(frB));
	//fld PS0(frA)	//load into ST(0)
	AB(0xDD);
	AB(0x05);
	AD(&PS0(frA));
	//fucompp //Compare ST(0) with ST(1) and pop register stack twice
	AB(0xDA);
	AB(0xE9);
	//fnstsw ax
	AB(0xDF);
	AB(0xE0);
	//sahf  (C3, C2, C0) => ZF, PF, CF
	AB(0x9E);
	//jp unordered
	AB(0x7A);
	request_label("unordered");
	//jc less
	AB(0x72);
	request_label("less");
	//jz equal
	AB(0x74);
	request_label("equal");
	{ //greater:
		//mov al, 0x40
		AB(0xB0);
		AB(0x40);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("unordered");
		//mov al, 0x10
		AB(0xB0);
		AB(0x10);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("less");
		//mov al, 0x80
		AB(0xB0);
		AB(0x80);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("equal");
		//mov al, 0x20
		AB(0xB0);
		AB(0x20);
	}

	add_label("store");
	add_set_fpscr_fpcc_by_al();
	add_setal2cr(crfD);
}

void Recompiler::_ps_cmpu1() {
	D_crfD; D_frA; D_frB;

	//fld PS1(frB)	//load into ST(1)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frB));
	//fld PS1(frA)	//load into ST(0)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frA));
	//fucompp //Compare ST(0) with ST(1) and pop register stack twice
	AB(0xDA);
	AB(0xE9);
	//fnstsw ax
	AB(0xDF);
	AB(0xE0);
	//sahf  (C3, C2, C0) => ZF, PF, CF
	AB(0x9E);
	//jp unordered
	AB(0x7A);
	request_label("unordered");
	//jc less
	AB(0x72);
	request_label("less");
	//jz equal
	AB(0x74);
	request_label("equal");
	{ //greater:
		//mov al, 0x40
		AB(0xB0);
		AB(0x40);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("unordered");
		//mov al, 0x10
		AB(0xB0);
		AB(0x10);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("less");
		//mov al, 0x80
		AB(0xB0);
		AB(0x80);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("equal");
		//mov al, 0x20
		AB(0xB0);
		AB(0x20);
	}

	add_label("store");
	add_set_fpscr_fpcc_by_al();
	add_setal2cr(crfD);
}

void Recompiler::_fres() {
	D_frD; D_frB; D_Rc;
	if(Rc)
		throw interp_fatal_exception("Unemulated instruction mode");

	//SET_FPR_SINGLE_FPRF(1.0 / FPR_PS_D(frB));
	SET_DISP8;
	//fld1
	AB(0xD9);
	AB(0xE8);
	FDIV_PS_D_DISP8(frB);
	add_set_fpscr_fprf();
	SET_FPR_SINGLE_DISP8;
}

void Recompiler::_ps_nmadd() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	SET_DISP8;

	//r.set_fpscr_fprf(PS0(frD) =
	//-(PS0(frA) * PS0(frC) + PS0(frB)));
	//fld PS0(frA)
	AB(0xDD);
	AB(0x43);
	ADD_DISP8(&PS0(frA));
	//fmul PS0(frC)
	AB(0xDC);
	AB(0x4B);
	ADD_DISP8(&PS0(frC));
	//fadd PS0(frB)
	AB(0xDC);
	AB(0x43);
	ADD_DISP8(&PS0(frB));
	//fchs
	AB(0xDD);
	AB(0xE0);

	add_set_fpscr_fprf(); //We need to to this before the stack is popped and we lose the data

	//fstp PS0(frD)
	AB(0xDD);
	AB(0x5B);
	ADD_DISP8(&PS0(frD));

	//PS1(frD) = -(PS1(frA) * PS1(frC) + PS1(frB));
	//fld PS1(frA)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frA));
	//fmul PS1(frC)
	AB(0xDC);
	AB(0x0D);
	AD(&PS1(frC));
	//fadd PS1(frB)
	AB(0xDC);
	AB(0x05);
	AD(&PS1(frB));
	//fchs
	AB(0xD9);
	AB(0xE0);
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_nmsub() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	SET_DISP8;

	//r.set_fpscr_fprf(PS0(frD) = 
	//-(PS0(frA) * PS0(frC) - PS0(frB)));
	//fld PS0(frA)
	AB(0xDD);
	AB(0x43);
	ADD_DISP8(&PS0(frA));
	//fmul PS0(frC)
	AB(0xDC);
	AB(0x4B);
	ADD_DISP8(&PS0(frC));
	//fsub PS0(frB)
	AB(0xDC);
	AB(0x63);
	ADD_DISP8(&PS0(frB));
	//fchs
	AB(0xD9);
	AB(0xE0);

	add_set_fpscr_fprf(); //We need to to this before the stack is popped and we lose the data

	//fstp PS0(frD)
	AB(0xDD);
	AB(0x5B);
	ADD_DISP8(&PS0(frD));

	//PS1(frD) = -(PS1(frA) * PS1(frC) - PS1(frB));
	//fld PS1(frA)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frA));
	//fmul PS1(frC)
	AB(0xDC);
	AB(0x0D);
	AD(&PS1(frC));
	//fsub PS1(frB)
	AB(0xDC);
	AB(0x25);
	AD(&PS1(frB));
	//fchs
	AB(0xD9);
	AB(0xE0);
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_sum1() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//double temp = PS0(frA) + PS1(frB);
	//PS0(frD) = PS0(frC);
	//r.set_fpscr_fprf(PS1(frD) = temp);	//NOTE!

	SET_DISP8;
	//fld PS0(frA)
	AB(0xDD);
	AB(0x43);
	ADD_DISP8(&PS0(frA));
	//fadd PS1(frB)
	AB(0xDC);
	AB(0x05);
	AD(&PS1(frB));

	//fld PS0(frC)
	AB(0xDD);
	AB(0x43);
	ADD_DISP8(&PS0(frC));
	//fstp PS0(frD)
	AB(0xDD);
	AB(0x5B);
	ADD_DISP8(&PS0(frD));

	add_set_fpscr_fprf(); //We need to to this before the stack is popped and we lose the data

	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_eqv() {
	D_rS; D_rA; D_rB; D_Rc;

	//r.gpr[rA] = ~(r.gpr[rS] ^ r.gpr[rB]);
	/*if(rA == rS || rA == rB) {  //Optimization
	//NOP if(rA == rS && rA == rB && !Rc). Unlikely. Ignored.
	//mov eax, r.gpr[rA == rS ? rB : rS]
	ADD_BYTE(0xA1);
	ADD_DWORD(&r.gpr[rA == rS ? rB : rS]);
	//xor r.gpr[rA], eax
	AB(0x31);
	AB(0x05);
	AD(&r.gpr[rA]);
	} else*/ {
		//mov eax, r.gpr[rS]
		ADD_BYTE(0xA1);
		ADD_DWORD(&r.gpr[rS]);
		//xor eax, r.gpr[rB]
		ADD_BYTE(0x33);
		ADD_BYTE(0x05);
		ADD_DWORD(&r.gpr[rB]);
		//not eax
		AB(0xF7);
		AB(0xD0);
		//mov r.gpr[rA], eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.gpr[rA]);
	}

	if(Rc) {  //can be opimized by using a different setcr0.
		//test eax, eax
		AB(0x85);
		AB(0xC0);
		add_setcr0();
	}
}

void Recompiler::_lhau() {
	D_rD; D_rA; D_d16;
	if(rA == 0 || rA == rD)
		throw bad_form_exception(bad_form_exception::Invalid);

	//r.gpr[rD] = m.rh(rA_0 + d);
	add_mem1_rA0d(rA, d, false);
	if(rA != 0) {
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rh	//thiscall assumed
		AB(0xE8);
		WORD (Hardware::*temp)(WORD) = &Hardware::rh;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov ax, [eax]
		ADD_BYTE(0x66);
		AB(0x8B);
		ADD_BYTE(0x00);
		//xchg al, ah
		ADD_BYTE(0x86);
		ADD_BYTE(0xC4);
	}
	add_label("end");
	//movsx eax, ax
	AB(0x0F);
	AB(0xBF);
	AB(0xC0);
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);

	//r.gpr[rA] += d;
	//add r.gpr[rA], d
	AB(0x81);
	AB(0x05);
	AD(&r.gpr[rA]);
	AD(d);
}

void Recompiler::_lhaux() {
	D_rD; D_rA; D_rB;
	if(rA == 0 || rA == rD)
		throw bad_form_exception(bad_form_exception::Invalid);

	//r.gpr[rD] = (short)m.rh(rA_0 + r.gpr[rB]);
	add_mem1_x(rA, rB, false);
	if(rA != 0) {
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rh	//thiscall assumed
		AB(0xE8);
		WORD (Hardware::*temp)(WORD) = &Hardware::rh;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov ax, [eax]
		ADD_BYTE(0x66);
		AB(0x8B);
		ADD_BYTE(0x00);
		//xchg al, ah
		ADD_BYTE(0x86);
		ADD_BYTE(0xC4);
	}
	add_label("end");
	if(rB == rD) {
		//DWORD temp = r.gpr[rB];
		//mov ebx, r.gpr[rB]
		AB(0x8B);
		AB(0x1D);
		AD(&r.gpr[rB]);
	}
	//movsx eax, ax
	AB(0x0F);
	AB(0xBF);
	AB(0xC0);
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);

	if(rB == rD) {
		//r.gpr[rA] += temp;
		//add r.gpr[rA], ebx
		AB(0x01);
		AB(0x1D);
		AD(&r.gpr[rA]);
	} else {
		//r.gpr[rA] += r.gpr[rB];
		//mov eax, r.gpr[rB]
		AB(0xA1);
		ADD_DWORD(&r.gpr[rB]);
		//add r.gpr[rA], eax
		AB(0x01);
		AB(0x05);
		AD(&r.gpr[rA]);
	}
}

void Recompiler::_fnmsub() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPRD_FPRF(-(FPR_PS_D(frA) * FPR_PS_D(frC) - FPR_PS_D(frB)));
	SET_DISP8;
	FLD_PS_D_DISP8(frA);
	FMUL_PS_D_DISP8(frC);
	FSUB_PS_D_DISP8(frB);
	//fchs
	AB(0xD9);
	AB(0xE0);
	add_set_fpscr_fprf();
	FSTPD_DISP8;
}

void Recompiler::_psq_stu() {
	D_frS; D_rA; D_W; D_I; D_d20;
	if(rA == 0)
		throw bad_form_exception(bad_form_exception::Invalid);

	/*if(!W)
	quantized_store_W0(rA_0 + d, I, frS);
	else
	quantized_store_W1(rA_0 + d, I, frS);*/
	//push edi
	AB(0x57);
	//push frS
	AB(0x68);
	AD(frS);
	//push I
	AB(0x68);
	AD(I);
	if(rA == 0) {
		//push d
		AB(0x68);
		AD(d);
	} else {
		//mov eax, r.gpr[rA]
		AB(0xA1);
		AD(&r.gpr[rA]);
		if(d != 0) {
			//add eax, d
			AB(0x05);
			AD(d);
		}
		//push eax
		AB(0x50);
	}
	//mov ecx, this //the this pointer
	AB(0xB9);
	AD(this);
	//call W ? quantized_store_W1 : quantized_store_W0	//thiscall assumed
	AB(0xE8);
	void (Recompiler::*temp)(DWORD, DWORD, DWORD) =
		W ? &Recompiler::quantized_store_W1 : &Recompiler::quantized_store_W0;
	REQUEST_CALL(MAKE(DWORD, temp));
	//pop edi
	AB(0x5F);

	//r.gpr[rA] += d;
	//add r.gpr[rA], d
	AB(0x81);
	AB(0x05);
	AD(&r.gpr[rA]);
	AD(d);
}

void Recompiler::_ps_rsqrte() {
	D_frD; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPRPS_FPRF(1.0 / sqrt(PS0(frB))), 1.0 / sqrt(PS1(frB)));
	{
		//fld1  //becomes st1
		AB(0xD9);
		AB(0xE8);
		//fld PS0(frB)
		AB(0xDD);
		AB(0x05);
		AD(&PS0(frB));
		//fsqrt //becomes st0
		AB(0xD9);
		AB(0xFA);
		//fdivp ST(1), ST(0)	//result becomes st0
		AB(0xDE);
		AB(0xF9);

		add_set_fpscr_fprf();

		//fstp PS0(frD)
		AB(0xDD);
		AB(0x1D);
		AD(&PS0(frD));
	}
	{
		//fld1  //becomes st1
		AB(0xD9);
		AB(0xE8);
		//fld PS1(frB)
		AB(0xDD);
		AB(0x05);
		AD(&PS1(frB));
		//fsqrt //becomes st0
		AB(0xD9);
		AB(0xFA);
		//fdivp ST(1), ST(0)	//result becomes st0
		AB(0xDE);
		AB(0xF9);
		//fstp PS1(frD)
		AB(0xDD);
		AB(0x1D);
		AD(&PS1(frD));
	}
}

void Recompiler::_ps_res() {
	D_frD; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPRPS_FPRF(1.0 / PS0(frB)), 1.0 / PS1(frB));
	{
		//fld1
		AB(0xD9);
		AB(0xE8);
		//fdiv PS0(frB)
		AB(0xDC);
		AB(0x35);
		AD(&PS0(frB));

		add_set_fpscr_fprf();

		//fstp PS0(frD)
		AB(0xDD);
		AB(0x1D);
		AD(&PS0(frD));
	}
	{
		//fld1
		AB(0xD9);
		AB(0xE8);
		//fdiv PS1(frB)
		AB(0xDC);
		AB(0x35);
		AD(&PS1(frB));
		//fstp PS1(frD)
		AB(0xDD);
		AB(0x1D);
		AD(&PS1(frD));
	}
}

void Recompiler::__oovpa() {
	//DWORD i = getbitsw(opcode, 6, 31);
	//SET_FETCH(generic_oovpa_func(cia, i));
	//mov eax, cia
	AB(0xB8);
	AD(cia);
	add_fatal_error(REC_OOVPA);
	last_instruction_type = IT_ERROR;
}

void Recompiler::__gcm_dol_loaded() {
	//this->do_oovpa();
	//push edi
	AB(0x57);
	//mov ecx, this
	AB(0xB9);
	AD(this);
	//call do_oovpa //thiscall assumed
	AB(0xE8);
	void (Recompiler::*temp)() = &Recompiler::do_oovpa;
	REQUEST_CALL(MAKE(DWORD, temp));
	//pop edi
	AB(0x5F);
}

void Recompiler::_mcrxr() {
	D_crfD;

	//r.setcr(crfD, getbitsw(r.xer, 0, 3));
	//mov al, HIBYTE(r.xer)
	AB(0xA0);
	AD(DWORD(&r.xer) + 3);
	//and al, 0xF0
	AB(0x24);
	AB(0xF0);

	add_setal2cr(crfD);

	//r.xer &= 0x0FFFFFFF;
	//and HIBYTE(r.xer), 0x0F
	AB(0x80);
	AB(0x25);
	AD(DWORD(&r.xer) + 3);
	AB(0x0F);
}

void Recompiler::_stfiwx() {
	D_frS; D_rA; D_rB;

	//m.ww(rA_0 + r.gpr[rB], r.fpr[frS].loword);
	add_mem1_x(rA, rB, true);
	{
		//push r.fpr[frS].loword
		AB(0xFF);
		AB(0x35);
		AD(&r.fpr[frS].loword);
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.ww	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, DWORD) = &Hardware::ww;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov ebx, r.fpr[frS].loword
		ADD_BYTE(0x8B);
		ADD_BYTE(0x1D);
		ADD_DWORD(&r.fpr[frS].loword);
		//bswap ebx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xCB);
		//mov [eax], ebx
		ADD_BYTE(0x89);
		ADD_BYTE(0x18);
	}
	add_label("end");
}

void Recompiler::_fctiw() {
	D_frD; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//M$VC usually uses round to zero, if I've understood things correctly.
	//Now it seems I haven't.
	SET_DISP8;

	//r.fpr[frD].loword = (int)FPR_PS_D(frB);
	FLD_PS_D_DISP8(frB);
	//fistp r.fpr[frD].loword
	AB(0xDB);
	AB(0x5B);
	ADD_DISP8(&r.fpr[frD].loword);

	//r.fpr[frD].hiword = UNDEFINED_PPCWORD;
	//mov r.fpr[frD].hiword, UNDEFINED_PPCWORD
	AB(0xC7);
	AB(0x43);
	ADD_DISP8(&r.fpr[frD].hiword);
	AD(UNDEFINED_PPCWORD);
}

void Recompiler::_sthbrx() {
	D_rS; D_rA; D_rB;

	//m.wh(rA_0 + r.gpr[rB], swaph((WORD)r.gpr[rS]));
	add_mem1_x(rA, rB, true);
	{
		//movzx ebx, (WORD)r.gpr[rS]
		AB(0x0F);
		AB(0xB7);
		AB(0x1D);
		AD(&r.gpr[rS]);
		//xchg bl, bh
		ADD_BYTE(0x86);
		ADD_BYTE(0xDF);
		//push ebx
		AB(0x53);
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.ww	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, DWORD) = &Hardware::ww;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//movzx ebx, (WORD)r.gpr[rS]
		AB(0x0F);
		AB(0xB7);
		AB(0x1D);
		AD(&r.gpr[rS]);
		//mov [eax], bx
		ADD_BYTE(0x66);
		ADD_BYTE(0x89);
		ADD_BYTE(0x18);
	}
	add_label("end");
}

void Recompiler::__osreport() {
	//this->osreport();
	//push edi
	AB(0x57);
	//mov ecx, this
	AB(0xB9);
	AD(this);
	//call osreport //thiscall assumed
	AB(0xE8);
	void (Recompiler::*temp)() = &Recompiler::osreport;
	REQUEST_CALL(MAKE(DWORD, temp));
	//pop edi
	AB(0x5F);
}

void Recompiler::_stfdu() {
	D_frS; D_rA; D_d16;
	if(rA == 0)
		throw bad_form_exception(bad_form_exception::Invalid);

	//m.wd(r.gpr[rA] + d, MAKE(QWORD, FPR_PS_D(frS)));
	FLD_PS_D(frS);  //It's a performance gain to move this here, but I'm not sure how stable it is.

	add_mem1_rA0d(rA, d, true);
	{
		//push (double)FPR_PS_D(frS)
		AB(0x83); AB(0xEC); AB(8);	//sub esp, 8
		AB(0xDD); AB(0x1C); AB(0x24); //fstp qword [esp]
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.wd	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, QWORD) = &Hardware::wd;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		AB(0xDD); AB(0x18);	//fstp qword [eax]

		//mov ebx, [eax]
		AB(0x8B);
		AB(0x18);
		//mov ecx, [eax + 4]
		AB(0x8B);
		AB(0x48);
		AB(4);
		//bswap ebx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xCB);
		//bswap ecx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC9);
		//mov [eax + 4], ebx
		AB(0x89);
		AB(0x58);
		AB(4);
		//mov [eax], ecx
		AB(0x89);
		AB(0x08);
	}
	add_label("end");

	//r.gpr[rA] += d;
	//add r.gpr[rA], d
	AB(0x81);
	AB(0x05);
	AD(&r.gpr[rA]);
	AD(d);
}

void Recompiler::_stfdux() {
	D_frS; D_rA; D_rB;
	if(rA == 0)
		throw bad_form_exception(bad_form_exception::Invalid);

	//m.wd(rA_0 + r.gpr[rB], MAKE(QWORD, FPR_PS_D(frS)));
	FLD_PS_D(frS);

	add_mem1_x(rA, rB, true);
	if(rA != 0) {
		//push (double)FPR_PS_D(frS)
		AB(0x83); AB(0xEC); AB(8);	//sub esp, 8
		AB(0xDD); AB(0x1C); AB(0x24); //fstp qword [esp]
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.wd	//thiscall assumed
		AB(0xE8);
		void (Hardware::*temp)(WORD, QWORD) = &Hardware::wd;
		REQUEST_CALL(MAKE(DWORD, temp));

		add_mem2();
	}
	{
		add_label("no_hardware");
		//[eax] = swapd(MAKE(QWORD, FPR_PS_D(frS)));
		AB(0xDD); AB(0x18);	//fstp qword [eax]

		//mov ebx, [eax]
		AB(0x8B);
		AB(0x18);
		//mov ecx, [eax + 4]
		AB(0x8B);
		AB(0x48);
		AB(4);
		//bswap ebx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xCB);
		//bswap ecx
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC9);
		//mov [eax + 4], ebx
		AB(0x89);
		AB(0x58);
		AB(4);
		//mov [eax], ecx
		AB(0x89);
		AB(0x08);
	}
	add_label("end");

	//r.gpr[rA] += r.gpr[rB];
	//mov eax, r.gpr[rB]
	AB(0xA1);
	AD(&r.gpr[rB]);
	//add r.gpr[rA], eax
	AB(0x01);
	AB(0x05);
	AD(&r.gpr[rA]);
}

void Recompiler::_mtfsb0() {
	D_crbD; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);
	if(crbD == 1 || crbD == 2)
		throw bad_form_exception(bad_form_exception::Unemulated); //invalid?

	//setflags(r.fpscr, makeflag(crbD), false);
	//btr r.fpscr, 31 - cbrD
	AB(0x0F);
	AB(0xBA);
	AB(0x35);
	AD(&r.fpscr);
	AB(31 - crbD);
}

void Recompiler::_dcbz_l() {
}

void Recompiler::_lwbrx() {
	D_rD; D_rA; D_rB;

	//r.gpr[rD] = m.rw(rA_0 + r.gpr[rB]);
	add_mem1_x(rA, rB, false);
	{
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rw	//thiscall assumed
		AB(0xE8);
		DWORD (Hardware::*temp)(WORD) = &Hardware::rw;
		REQUEST_CALL(MAKE(DWORD, temp));

		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
		add_mem2();
	}
	{
		add_label("no_hardware");
		//mov eax, [eax]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x00);
	}
	add_label("end");
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);
}

void Recompiler::_lhbrx() {
	D_rD; D_rA; D_rB;

	//r.gpr[rD] = m.rh(rA_0 + r.gpr[rB]);
	add_mem1_x(rA, rB, false);
	{
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rh	//thiscall assumed
		AB(0xE8);
		WORD (Hardware::*temp)(WORD) = &Hardware::rh;
		REQUEST_CALL(MAKE(DWORD, temp));
		//movzx eax, ax //assumed that the high bytes is zeroed. NOT SO.
		AB(0x0F);
		AB(0xB7);
		AB(0xC0);

		//xchg al, ah
		ADD_BYTE(0x86);
		ADD_BYTE(0xC4);
		add_mem2();
	}
	{
		add_label("no_hardware");
		//movzx eax, word [eax]
		ADD_BYTE(0x0F);
		ADD_BYTE(0xB7);
		ADD_BYTE(0x00);
	}
	add_label("end");
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);
}

void Recompiler::_lfdux() {
	D_frD; D_rA; D_rB;
	if(rA == 0)
		throw bad_form_exception(bad_form_exception::Invalid);

	//r.fpr[frD].dword = m.rd(r.gpr[rA] + r.gpr[rB]);
	add_mem1_x(rA, rB, false);
	{
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rd	//thiscall assumed
		AB(0xE8);
		QWORD (Hardware::*temp)(WORD) = &Hardware::rd;
		REQUEST_CALL(MAKE(DWORD, temp));
		//mov r.fpr[frD].loword, eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.fpr[frD].loword);
		//mov r.fpr[frD].hiword, edx
		ADD_BYTE(0x89);
		AB(0x15);
		ADD_DWORD(&r.fpr[frD].hiword);

		add_mem2();
	}
	{
		add_label("no_hardware");
		//r.fpr[frD].dword = swapd([eax]);
		//mov ebx, eax
		AB(0x8B);
		AB(0xD8);

		//mov eax, [ebx]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x03);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
		//mov r.fpr[frD].hiword, eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.fpr[frD].hiword);

		//mov eax, [ebx + 4]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x43);
		AB(4);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
		//mov r.fpr[frD].loword, eax
		ADD_BYTE(0xA3);
		ADD_DWORD(&r.fpr[frD].loword);
	}
	add_label("end");

	//r.gpr[rA] += r.gpr[rB];
	//mov eax, r.gpr[rB]
	AB(0xA1);
	AD(&r.gpr[rB]);
	//add r.gpr[rA], eax
	AB(0x01);
	AB(0x05);
	AD(&r.gpr[rA]);
}

void Recompiler::_lfsux() {
	D_frD; D_rA; D_rB;
	if(rA == 0)
		throw bad_form_exception(bad_form_exception::Invalid);

	//SET_FPR_SINGLE(MAKE(float, m.rw(r.gpr[rA] + r.gpr[rB])));
	add_mem1_x(rA, rB, false);
	{
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rw	//thiscall assumed
		AB(0xE8);
		DWORD (Hardware::*temp)(WORD) = &Hardware::rw;
		REQUEST_CALL(MAKE(DWORD, temp));
		//fld eax //not possible; we must go through memory

		add_mem2();
	}
	{
		add_label("no_hardware");
		//r.fpr[frD].d = MAKE(float, swapw([eax]));
		//mov eax, [eax]
		ADD_BYTE(0x8B);
		ADD_BYTE(0x00);
		//bswap eax
		ADD_BYTE(0x0F);
		ADD_BYTE(0xC8);
	}
	add_label("end");
	//PS0(frD) = PS1(frD) = MAKE(float, eax);
	//fld eax //not possible; we must go through memory
	//mov [esp - 4], eax
	AB(0x89);
	AB(0x44);
	AB(0x24);
	AB(-4);
	//fld m32real [esp - 4]
	AB(0xD9);
	AB(0x44);
	AB(0x24);
	AB(-4);

	SET_FPR_SINGLE;

	//r.gpr[rA] += r.gpr[rB];
	//mov eax, r.gpr[rB]
	AB(0xA1);
	AD(&r.gpr[rB]);
	//add r.gpr[rA], eax
	AB(0x01);
	AB(0x05);
	AD(&r.gpr[rA]);
}

void Recompiler::_lhzux() {
	D_rD; D_rA; D_rB;
	if(rA == 0 || rA == rD)
		throw bad_form_exception(bad_form_exception::Invalid);

	//r.gpr[rD] = m.rh(rA_0 + r.gpr[rB]);
	add_mem1_x(rA, rB, false);
	{
		//push eax
		AB(0x50);
		//mov ecx, &h //the this pointer
		AB(0xB9);
		AD(&h);
		//call h.rh	//thiscall assumed
		AB(0xE8);
		WORD (Hardware::*temp)(WORD) = &Hardware::rh;
		REQUEST_CALL(MAKE(DWORD, temp));
		//movzx eax, ax //assumed that the high bytes is zeroed. NOT SO.
		AB(0x0F);
		AB(0xB7);
		AB(0xC0);

		add_mem2();
	}
	{
		add_label("no_hardware");
		//movzx eax, word [eax]
		ADD_BYTE(0x0F);
		ADD_BYTE(0xB7);
		ADD_BYTE(0x00);
		//xchg al, ah
		ADD_BYTE(0x86);
		ADD_BYTE(0xC4);
	}
	add_label("end");
	if(rB == rD) {
		//DWORD temp = r.gpr[rB];
		//mov ebx, r.gpr[rB]
		AB(0x8B);
		AB(0x1D);
		AD(&r.gpr[rB]);
	}
	//mov r.gpr[rD], eax
	ADD_BYTE(0xA3);
	ADD_DWORD(&r.gpr[rD]);

	if(rB == rD) {
		//r.gpr[rA] += temp;
		//add r.gpr[rA], ebx
		AB(0x01);
		AB(0x1D);
		AD(&r.gpr[rA]);
	} else {
		//r.gpr[rA] += r.gpr[rB];
		//mov eax, r.gpr[rB]
		AB(0xA1);
		ADD_DWORD(&r.gpr[rB]);
		//add r.gpr[rA], eax
		AB(0x01);
		AB(0x05);
		AD(&r.gpr[rA]);
	}
}

void Recompiler::_psq_lx() {
	D_frD; D_rA; D_rB; D_W; D_I;

	/*if(!W)
	quantized_load_W0(frD, rA_0 + r.gpr[rB], I);
	else
	quantized_load_W1(frD, rA_0 + r.gpr[rB], I);*/
	//push edi
	AB(0x57);
	//push I
	AB(0x68);
	AD(I);
	if(rA == 0) {
		//push r.gpr[rB]
		AB(0xFF);
		AB(0x35);
		AD(&r.gpr[rB]);
	} else {
		//mov eax, r.gpr[rA]
		AB(0xA1);
		AD(&r.gpr[rA]);
		//add eax, r.gpr[rB]
		ADD_BYTE(0x03);
		ADD_BYTE(0x05);
		ADD_DWORD(&r.gpr[rB]);
		//push eax
		AB(0x50);
	}
	//push frD
	AB(0x68);
	AD(frD);
	//mov ecx, this //the this pointer
	AB(0xB9);
	AD(this);
	//call quantized_load	//thiscall assumed
	AB(0xE8);
	void (Recompiler::*temp)(DWORD, DWORD, DWORD) =
		W ? &Recompiler::quantized_load_W1 : &Recompiler::quantized_load_W0;
	REQUEST_CALL(MAKE(DWORD, temp));
	//pop edi
	AB(0x5F);
}

void Recompiler::_psq_stx() {
	D_frS; D_rA; D_rB; D_W; D_I;

	/*if(!W)
	quantized_store_W0(rA_0 + d, I, frS);
	else
	quantized_store_W1(rA_0 + d, I, frS);*/
	//push edi
	AB(0x57);
	//push frS
	AB(0x68);
	AD(frS);
	//push I
	AB(0x68);
	AD(I);
	if(rA == 0) {
		//push r.gpr[rB]
		AB(0xFF);
		AB(0x35);
		AD(&r.gpr[rB]);
	} else {
		//mov eax, r.gpr[rA]
		AB(0xA1);
		AD(&r.gpr[rA]);
		//add eax, r.gpr[rB]
		ADD_BYTE(0x03);
		ADD_BYTE(0x05);
		ADD_DWORD(&r.gpr[rB]);
		//push eax
		AB(0x50);
	}
	//mov ecx, this //the this pointer
	AB(0xB9);
	AD(this);
	//call W ? quantized_store_W1 : quantized_store_W0	//thiscall assumed
	AB(0xE8);
	void (Recompiler::*temp)(DWORD, DWORD, DWORD) =
		W ? &Recompiler::quantized_store_W1 : &Recompiler::quantized_store_W0;
	REQUEST_CALL(MAKE(DWORD, temp));
	//pop edi
	AB(0x5F);
}

void Recompiler::_psq_lux() {
	D_frD; D_rA; D_rB; D_W; D_I;
	if(rA == 0)
		throw bad_form_exception(bad_form_exception::Invalid);

	/*if(!W)
	quantized_load_W0(frD, rA_0 + r.gpr[rB], I);
	else
	quantized_load_W1(frD, rA_0 + r.gpr[rB], I);*/
	//push edi
	AB(0x57);
	//push I
	AB(0x68);
	AD(I);
	if(rA == 0) {
		//push r.gpr[rB]
		AB(0xFF);
		AB(0x35);
		AD(&r.gpr[rB]);
	} else {
		//mov eax, r.gpr[rA]
		AB(0xA1);
		AD(&r.gpr[rA]);
		//add eax, r.gpr[rB]
		ADD_BYTE(0x03);
		ADD_BYTE(0x05);
		ADD_DWORD(&r.gpr[rB]);
		//push eax
		AB(0x50);
	}
	//push frD
	AB(0x68);
	AD(frD);
	//mov ecx, this //the this pointer
	AB(0xB9);
	AD(this);
	//call quantized_load	//thiscall assumed
	AB(0xE8);
	void (Recompiler::*temp)(DWORD, DWORD, DWORD) =
		W ? &Recompiler::quantized_load_W1 : &Recompiler::quantized_load_W0;
	REQUEST_CALL(MAKE(DWORD, temp));
	//pop edi
	AB(0x5F);

	//r.gpr[rA] += r.gpr[rB];
	//mov eax, r.gpr[rB]
	AB(0xA1);
	AD(&r.gpr[rB]);
	//add r.gpr[rA], eax
	AB(0x01);
	AB(0x05);
	AD(&r.gpr[rA]);
}

void Recompiler::_psq_stux() {
	D_frS; D_rA; D_rB; D_W; D_I;
	if(rA == 0)
		throw bad_form_exception(bad_form_exception::Invalid);

	/*if(!W)
	quantized_store_W0(rA_0 + d, I, frS);
	else
	quantized_store_W1(rA_0 + d, I, frS);*/
	//push edi
	AB(0x57);
	//push frS
	AB(0x68);
	AD(frS);
	//push I
	AB(0x68);
	AD(I);
	if(rA == 0) {
		//push r.gpr[rB]
		AB(0xFF);
		AB(0x35);
		AD(&r.gpr[rB]);
	} else {
		//mov eax, r.gpr[rA]
		AB(0xA1);
		AD(&r.gpr[rA]);
		//add eax, r.gpr[rB]
		ADD_BYTE(0x03);
		ADD_BYTE(0x05);
		ADD_DWORD(&r.gpr[rB]);
		//push eax
		AB(0x50);
	}
	//mov ecx, this //the this pointer
	AB(0xB9);
	AD(this);
	//call W ? quantized_store_W1 : quantized_store_W0	//thiscall assumed
	AB(0xE8);
	void (Recompiler::*temp)(DWORD, DWORD, DWORD) =
		W ? &Recompiler::quantized_store_W1 : &Recompiler::quantized_store_W0;
	REQUEST_CALL(MAKE(DWORD, temp));
	//pop edi
	AB(0x5F);

	//r.gpr[rA] += r.gpr[rB];
	//mov eax, r.gpr[rB]
	AB(0xA1);
	AD(&r.gpr[rB]);
	//add r.gpr[rA], eax
	AB(0x01);
	AB(0x05);
	AD(&r.gpr[rA]);
}

void Recompiler::_mtsr() {
	D_SR; D_rS;

	//r.sr[SR] = r.gpr[rS];
	//mov eax, r.gpr[rS]
	AB(0xA1);
	AD(&r.gpr[rS]);
	//mov r.sr[SR], eax
	AB(0xA3);
	AD(&r.sr[SR]);
}

void Recompiler::_ps_abs() {
	D_frD; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//PS0(frD) = fabs(PS0(frB));
	//PS1(frD) = fabs(PS1(frB));

	//fld PS0(frB)
	AB(0xDD);
	AB(0x05);
	AD(&PS0(frB));
	//fabs
	AB(0xD9);
	AB(0xE1);
	//fstp PS0(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS0(frD));
	//fld PS1(frB)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frB));
	//fabs
	AB(0xD9);
	AB(0xE1);
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_div() {
	D_frD; D_frA; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	SET_DISP8;

	//r.set_fpscr_fprf(PS0(frD) = PS0(frA) + PS0(frB));
	//fld PS0(frA)
	AB(0xDD);
	AB(0x43);
	ADD_DISP8(&PS0(frA));
	//fdiv PS0(frB)
	AB(0xDC);
	AB(0x63);
	ADD_DISP8(&PS0(frB));

	add_set_fpscr_fprf(); //We need to to this before the stack is popped and we lose the data

	//fstp PS0(frD)
	AB(0xDD);
	AB(0x5B);
	ADD_DISP8(&PS0(frD));

	//PS1(frD) = PS1(frA) + PS1(frB);
	//fld PS1(frA)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frA));
	//fdiv PS1(frB)
	AB(0xDC);
	AB(0x25);
	AD(&PS1(frB));
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_nabs() {
	D_frD; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//PS0(frD) = -fabs(PS0(frB));
	//PS1(frD) = -fabs(PS1(frB));

	//fld PS0(frB)
	AB(0xDD);
	AB(0x05);
	AD(&PS0(frB));
	//fabs
	AB(0xD9);
	AB(0xE1);
	//fchs
	AB(0xD9);
	AB(0xE0);
	//fstp PS0(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS0(frD));
	//fld PS1(frB)
	AB(0xDD);
	AB(0x05);
	AD(&PS1(frB));
	//fabs
	AB(0xD9);
	AB(0xE1);
	//fchs
	AB(0xD9);
	AB(0xE0);
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_ps_sel() {
	D_frD; D_frA; D_frC; D_frB; D_Rc;
	if(Rc)
		throw bad_form_exception(bad_form_exception::Unemulated);

	//SET_FPRPS_FPRF((PS0(frA) >= 0.0) ? PS0(frC) : PS0(frB),
	//(PS1(frA) >= 0.0) ? PS1(frC) : PS1(frB));

	{
		//fldz
		AB(0xD9);
		AB(0xEE);
		//fcomp PS0(frA)
		AB(0xD8);
		AB(0x1D);
		AD(&PS0(frA));
		//fnstsw ax
		AB(0xDF);
		AB(0xE0);
		//test ah, 0x45 //C3 | C2 | C0
		AB(0xF6);
		AB(0xC4);
		AB(0x45);
	}

	//jz less //!(PS0(frA) >= 0.0)
	AB(0x74);
	request_label("less0");
	{ //more_or_equal:
		//fld PS0(frC)
		AB(0xDD);
		AB(0x05);
		AD(&PS0(frC));
		//jmp part1
		AB(0xEB);
		request_label("copy0");
	}
	{
		add_label("less0");
		//fld PS0(frB)
		AB(0xDD);
		AB(0x05);
		AD(&PS0(frB));
	}
	add_label("copy0");
	//fstp PS0(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS0(frD));

	{
		//fldz
		AB(0xD9);
		AB(0xEE);
		//fcomp PS1(frA)
		AB(0xD8);
		AB(0x1D);
		AD(&PS1(frA));
		//fnstsw ax
		AB(0xDF);
		AB(0xE0);
		//test ah, 0x45 //C3 | C2 | C0
		AB(0xF6);
		AB(0xC4);
		AB(0x45);
	}

	//jz less //!(PS1(frA) >= 0.0)
	AB(0x74);
	request_label("less1");
	{ //more_or_equal:
		//fld PS1(frC)
		AB(0xDD);
		AB(0x05);
		AD(&PS1(frC));
		//jmp part1
		AB(0xEB);
		request_label("copy1");
	}
	{
		add_label("less1");
		//fld PS1(frB)
		AB(0xDD);
		AB(0x05);
		AD(&PS1(frB));
	}
	add_label("copy1");
	//fstp PS1(frD)
	AB(0xDD);
	AB(0x1D);
	AD(&PS1(frD));
}

void Recompiler::_tlbsync() {
}

void Recompiler::_mcrfs() {
	D_crfD; D_crfS;

	//r.setcr(crfD, getbitsw(r.fpscr, crfS*4, crfS*4+3));
	//mov eax, r.fpscr
	AB(0xA1);
	AD(&r.fpscr);
	if(crfS == 7) {
		//shl eax, 4
		AB(0xC1);
		AB(0xE0);
		AB(4);
	} else {
		//shr eax, 4 * (6 - crfS) //we must use the high nibble of al, not the low
		AB(0xC1);
		AB(0xE8);
		AB(4 * (6 - crfS));
	}
	//and al, 0xF0
	AB(0x24);
	AB(0xF0);

	add_setal2cr(crfD);
}

void Recompiler::_crand() {
	D_crbD; D_crbA; D_crbB;

	//setflags(r.cr, makeflag(crbD), getbit(r.cr, crbA) && getbit(r.cr, crbB));
	//mov eax, r.cr
	AB(0xA1);
	AD(&r.cr);
	//bt eax, 31-crbA
	AB(0x0F);
	AB(0xBA);
	AB(0xE0);
	AB((BYTE)31 - crbA);
	//jnc clear
	AB(0x73);
	request_label("clear");
	//bt eax, 31-crbB
	AB(0x0F);
	AB(0xBA);
	AB(0xE0);
	AB((BYTE)31 - crbB);
	//jc set
	AB(0x72);
	request_label("set");
	{
		add_label("clear");
		//btr eax, 31-crbD
		AB(0x0F);
		AB(0xBA);
		AB(0xF0);
		AB((BYTE)31 - crbD);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("set");
		//bts eax, 31-crbD
		AB(0x0F);
		AB(0xBA);
		AB(0xE8);
		AB((BYTE)31 - crbD);
	}

	add_label("store");
	//mov r.cr, eax
	AB(0xA3);
	AD(&r.cr);
}

void Recompiler::_crandc() {
	D_crbD; D_crbA; D_crbB;

	//setflags(r.cr, makeflag(crbD), getbit(r.cr, crbA) && !getbit(r.cr, crbB));
	//optimizations are possible
	//mov eax, r.cr
	AB(0xA1);
	AD(&r.cr);
	//bt eax, 31-crbA
	AB(0x0F);
	AB(0xBA);
	AB(0xE0);
	AB((BYTE)31 - crbA);
	//jnc clear
	AB(0x73);
	request_label("clear");
	//bt eax, 31-crbB
	AB(0x0F);
	AB(0xBA);
	AB(0xE0);
	AB((BYTE)31 - crbB);
	//jnc set
	AB(0x73);
	request_label("set");
	{
		add_label("clear");
		//btr eax, 31-crbD
		AB(0x0F);
		AB(0xBA);
		AB(0xF0);
		AB((BYTE)31 - crbD);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("set");
		//bts eax, 31-crbD
		AB(0x0F);
		AB(0xBA);
		AB(0xE8);
		AB((BYTE)31 - crbD);
	}

	add_label("store");
	//mov r.cr, eax
	AB(0xA3);
	AD(&r.cr);
}

void Recompiler::_crnand() {
	D_crbD; D_crbA; D_crbB;

	//setflags(r.cr, makeflag(crbD), !(getbit(r.cr, crbA) && getbit(r.cr, crbB)));
	//mov eax, r.cr
	AB(0xA1);
	AD(&r.cr);
	//bt eax, 31-crbA
	AB(0x0F);
	AB(0xBA);
	AB(0xE0);
	AB((BYTE)31 - crbA);
	//jnc set
	AB(0x73);
	request_label("set");
	//bt eax, 31-crbB
	AB(0x0F);
	AB(0xBA);
	AB(0xE0);
	AB((BYTE)31 - crbB);
	//jnc set
	AB(0x73);
	request_label("set");
	{ //clear:
		//btr eax, 31-crbD
		AB(0x0F);
		AB(0xBA);
		AB(0xF0);
		AB((BYTE)31 - crbD);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("set");
		//bts eax, 31-crbD
		AB(0x0F);
		AB(0xBA);
		AB(0xE8);
		AB((BYTE)31 - crbD);
	}

	add_label("store");
	//mov r.cr, eax
	AB(0xA3);
	AD(&r.cr);
}

void Recompiler::_crorc() {
	D_crbD; D_crbA; D_crbB;

	//setflags(r.cr, makeflag(crbD), getbit(r.cr, crbA) || !getbit(r.cr, crbB));
	//mov eax, r.cr
	AB(0xA1);
	AD(&r.cr);
	//bt eax, 31-crbA
	AB(0x0F);
	AB(0xBA);
	AB(0xE0);
	AB((BYTE)31 - crbA);
	//jc set
	AB(0x72);
	request_label("set");
	//bt eax, 31-crbB
	AB(0x0F);
	AB(0xBA);
	AB(0xE0);
	AB((BYTE)31 - crbB);
	//jnc set
	AB(0x73);
	request_label("set");
	{ //clear:
		//btr eax, 31-crbD
		AB(0x0F);
		AB(0xBA);
		AB(0xF0);
		AB((BYTE)31 - crbD);
		//jmp store
		AB(0xEB);
		request_label("store");
	}
	{
		add_label("set");
		//bts eax, 31-crbD
		AB(0x0F);
		AB(0xBA);
		AB(0xE8);
		AB((BYTE)31 - crbD);
	}

	add_label("store");
	//mov r.cr, eax
	AB(0xA3);
	AD(&r.cr);
}

void Recompiler::__dsp_quick_execute() {
	//this->dsp_quick_execute();
	//push edi
	AB(0x57);
	//mov ecx, this
	AB(0xB9);
	AD(this);
	//call dsp_quick_execute //thiscall assumed
	AB(0xE8);
	void (Recompiler::*temp)() = &Recompiler::dsp_quick_execute;
	REQUEST_CALL(MAKE(DWORD, temp));
	//pop edi
	AB(0x5F);
}
